Repository: Cinnamon/kotaemon
Branch: main
Commit: 155e590720f9
Files: 363
Total size: 2.3 MB

Directory structure:
gitextract_waa09c9u/

├── .commitlintrc
├── .dockerignore
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   └── feature_request.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── auto-bump-and-release.yaml
│       ├── build-push-docker.yaml
│       ├── pr-lint.yaml
│       ├── style-check.yaml
│       └── unit-test.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE.txt
├── README.md
├── app.py
├── doc_env_reqs.txt
├── docs/
│   ├── about.md
│   ├── development/
│   │   ├── contributing.md
│   │   ├── create-a-component.md
│   │   ├── data-components.md
│   │   ├── index.md
│   │   └── utilities.md
│   ├── extra/
│   │   └── css/
│   │       └── code_select.css
│   ├── index.md
│   ├── local_model.md
│   ├── online_install.md
│   ├── pages/
│   │   └── app/
│   │       ├── customize-flows.md
│   │       ├── ext/
│   │       │   └── user-management.md
│   │       ├── features.md
│   │       ├── functional-description.md
│   │       ├── index/
│   │       │   └── file.md
│   │       └── settings/
│   │           ├── overview.md
│   │           └── user-settings.md
│   ├── scripts/
│   │   ├── generate_examples_docs.py
│   │   └── generate_reference_docs.py
│   ├── theme/
│   │   ├── assets/
│   │   │   └── pymdownx-extras/
│   │   │       ├── extra-fb5a2a1c86.css
│   │   │       ├── extra-loader-MCFnu0Wd.js
│   │   │       ├── material-extra-3rdparty-E-i8w1WA.js
│   │   │       └── material-extra-theme-TVq-kNRT.js
│   │   ├── main.html
│   │   └── partials/
│   │       ├── footer.html
│   │       ├── header.html
│   │       └── libs.html
│   └── usage.md
├── flowsettings.py
├── fly.toml
├── launch.sh
├── libs/
│   ├── kotaemon/
│   │   ├── README.md
│   │   ├── kotaemon/
│   │   │   ├── __init__.py
│   │   │   ├── agents/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── io/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── base.py
│   │   │   │   ├── langchain_based.py
│   │   │   │   ├── react/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── agent.py
│   │   │   │   │   └── prompt.py
│   │   │   │   ├── rewoo/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── agent.py
│   │   │   │   │   ├── planner.py
│   │   │   │   │   ├── prompt.py
│   │   │   │   │   └── solver.py
│   │   │   │   ├── tools/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── google.py
│   │   │   │   │   ├── llm.py
│   │   │   │   │   ├── mcp.py
│   │   │   │   │   └── wikipedia.py
│   │   │   │   └── utils.py
│   │   │   ├── base/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── component.py
│   │   │   │   └── schema.py
│   │   │   ├── chatbot/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   └── simple_respondent.py
│   │   │   ├── cli.py
│   │   │   ├── contribs/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── docs.py
│   │   │   │   └── promptui/
│   │   │   │       ├── .gitignore
│   │   │   │       ├── __init__.py
│   │   │   │       ├── base.py
│   │   │   │       ├── cli.py
│   │   │   │       ├── config.py
│   │   │   │       ├── export.py
│   │   │   │       ├── logs.py
│   │   │   │       ├── themes.py
│   │   │   │       ├── tunnel.py
│   │   │   │       └── ui/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── blocks.py
│   │   │   │           ├── chat.py
│   │   │   │           └── pipeline.py
│   │   │   ├── embeddings/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── endpoint_based.py
│   │   │   │   ├── fastembed.py
│   │   │   │   ├── langchain_based.py
│   │   │   │   ├── openai.py
│   │   │   │   ├── tei_endpoint_embed.py
│   │   │   │   └── voyageai.py
│   │   │   ├── indices/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── extractors/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── doc_parsers.py
│   │   │   │   ├── ingests/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── files.py
│   │   │   │   ├── qa/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── citation.py
│   │   │   │   │   ├── citation_qa.py
│   │   │   │   │   ├── citation_qa_inline.py
│   │   │   │   │   ├── format_context.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── rankings/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── cohere.py
│   │   │   │   │   ├── llm.py
│   │   │   │   │   ├── llm_scoring.py
│   │   │   │   │   └── llm_trulens.py
│   │   │   │   ├── retrievers/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── jina_web_search.py
│   │   │   │   │   └── tavily_web_search.py
│   │   │   │   ├── splitters/
│   │   │   │   │   └── __init__.py
│   │   │   │   └── vectorindex.py
│   │   │   ├── llms/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── branching.py
│   │   │   │   ├── chats/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   ├── endpoint_based.py
│   │   │   │   │   ├── langchain_based.py
│   │   │   │   │   ├── llamacpp.py
│   │   │   │   │   └── openai.py
│   │   │   │   ├── completions/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base.py
│   │   │   │   │   └── langchain_based.py
│   │   │   │   ├── cot.py
│   │   │   │   ├── linear.py
│   │   │   │   └── prompts/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── base.py
│   │   │   │       └── template.py
│   │   │   ├── loaders/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── adobe_loader.py
│   │   │   │   ├── azureai_document_intelligence_loader.py
│   │   │   │   ├── base.py
│   │   │   │   ├── composite_loader.py
│   │   │   │   ├── docling_loader.py
│   │   │   │   ├── docx_loader.py
│   │   │   │   ├── excel_loader.py
│   │   │   │   ├── html_loader.py
│   │   │   │   ├── mathpix_loader.py
│   │   │   │   ├── ocr_loader.py
│   │   │   │   ├── pdf_loader.py
│   │   │   │   ├── txt_loader.py
│   │   │   │   ├── unstructured_loader.py
│   │   │   │   ├── utils/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── adobe.py
│   │   │   │   │   ├── box.py
│   │   │   │   │   ├── gpt4v.py
│   │   │   │   │   ├── pdf_ocr.py
│   │   │   │   │   └── table.py
│   │   │   │   └── web_loader.py
│   │   │   ├── parsers/
│   │   │   │   ├── __init__.py
│   │   │   │   └── regex_extractor.py
│   │   │   ├── rerankings/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── cohere.py
│   │   │   │   ├── tei_fast_rerank.py
│   │   │   │   └── voyageai.py
│   │   │   └── storages/
│   │   │       ├── __init__.py
│   │   │       ├── docstores/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── base.py
│   │   │       │   ├── elasticsearch.py
│   │   │       │   ├── in_memory.py
│   │   │       │   ├── lancedb.py
│   │   │       │   └── simple_file.py
│   │   │       └── vectorstores/
│   │   │           ├── __init__.py
│   │   │           ├── base.py
│   │   │           ├── chroma.py
│   │   │           ├── in_memory.py
│   │   │           ├── lancedb.py
│   │   │           ├── milvus.py
│   │   │           ├── qdrant.py
│   │   │           └── simple_file.py
│   │   ├── pyproject.toml
│   │   ├── pytest.ini
│   │   └── tests/
│   │       ├── __init__.py
│   │       ├── _test_multimodal_reader.py
│   │       ├── conftest.py
│   │       ├── resources/
│   │       │   ├── dummy.docx
│   │       │   ├── dummy.mhtml
│   │       │   ├── dummy.xlsx
│   │       │   ├── embedding_openai.json
│   │       │   ├── embedding_openai_batch.json
│   │       │   ├── fullocr_sample_output.json
│   │       │   ├── ggml-vocab-llama.gguf
│   │       │   ├── html/
│   │       │   │   └── dummy.html
│   │       │   └── policy.md
│   │       ├── simple_pipeline.py
│   │       ├── test_agent.py
│   │       ├── test_composite.py
│   │       ├── test_cot.py
│   │       ├── test_docstores.py
│   │       ├── test_documents.py
│   │       ├── test_embedding_models.py
│   │       ├── test_indexing_retrieval.py
│   │       ├── test_ingestor.py
│   │       ├── test_llms_chat_models.py
│   │       ├── test_llms_completion_models.py
│   │       ├── test_mcp_manager.py
│   │       ├── test_mcp_tools.py
│   │       ├── test_post_processing.py
│   │       ├── test_prompt.py
│   │       ├── test_promptui.py
│   │       ├── test_reader.py
│   │       ├── test_reranking.py
│   │       ├── test_splitter.py
│   │       ├── test_table_reader.py
│   │       ├── test_telemetry.py
│   │       ├── test_template.py
│   │       ├── test_tools.py
│   │       └── test_vectorstore.py
│   └── ktem/
│       ├── .gitignore
│       ├── MANIFEST.in
│       ├── alembic.ini
│       ├── ktem/
│       │   ├── __init__.py
│       │   ├── app.py
│       │   ├── assets/
│       │   │   ├── __init__.py
│       │   │   ├── css/
│       │   │   │   └── main.css
│       │   │   ├── js/
│       │   │   │   ├── main.js
│       │   │   │   └── pdf_viewer.js
│       │   │   ├── md/
│       │   │   │   ├── about.md
│       │   │   │   ├── changelogs.md
│       │   │   │   └── usage.md
│       │   │   └── theme.py
│       │   ├── components.py
│       │   ├── db/
│       │   │   ├── __init__.py
│       │   │   ├── base_models.py
│       │   │   ├── engine.py
│       │   │   └── models.py
│       │   ├── embeddings/
│       │   │   ├── __init__.py
│       │   │   ├── db.py
│       │   │   ├── manager.py
│       │   │   └── ui.py
│       │   ├── exceptions.py
│       │   ├── extension_protocol.py
│       │   ├── index/
│       │   │   ├── __init__.py
│       │   │   ├── base.py
│       │   │   ├── file/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── base.py
│       │   │   │   ├── exceptions.py
│       │   │   │   ├── graph/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── graph_index.py
│       │   │   │   │   ├── light_graph_index.py
│       │   │   │   │   ├── lightrag_pipelines.py
│       │   │   │   │   ├── nano_graph_index.py
│       │   │   │   │   ├── nano_pipelines.py
│       │   │   │   │   ├── pipelines.py
│       │   │   │   │   └── visualize.py
│       │   │   │   ├── index.py
│       │   │   │   ├── knet/
│       │   │   │   │   ├── __init__.py
│       │   │   │   │   ├── knet_index.py
│       │   │   │   │   └── pipelines.py
│       │   │   │   ├── pipelines.py
│       │   │   │   ├── ui.py
│       │   │   │   └── utils.py
│       │   │   ├── manager.py
│       │   │   ├── models.py
│       │   │   └── ui.py
│       │   ├── llms/
│       │   │   ├── __init__.py
│       │   │   ├── db.py
│       │   │   ├── manager.py
│       │   │   └── ui.py
│       │   ├── main.py
│       │   ├── mcp/
│       │   │   ├── __init__.py
│       │   │   ├── db.py
│       │   │   ├── manager.py
│       │   │   └── ui.py
│       │   ├── pages/
│       │   │   ├── __init__.py
│       │   │   ├── chat/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── chat_panel.py
│       │   │   │   ├── chat_suggestion.py
│       │   │   │   ├── common.py
│       │   │   │   ├── control.py
│       │   │   │   ├── demo_hint.py
│       │   │   │   ├── paper_list.py
│       │   │   │   └── report.py
│       │   │   ├── help.py
│       │   │   ├── login.py
│       │   │   ├── resources/
│       │   │   │   ├── __init__.py
│       │   │   │   └── user.py
│       │   │   ├── settings.py
│       │   │   └── setup.py
│       │   ├── reasoning/
│       │   │   ├── __init__.py
│       │   │   ├── base.py
│       │   │   ├── prompt_optimization/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── decompose_question.py
│       │   │   │   ├── fewshot_rewrite_question.py
│       │   │   │   ├── mindmap.py
│       │   │   │   ├── rephrase_question_train.json
│       │   │   │   ├── rewrite_question.py
│       │   │   │   ├── suggest_conversation_name.py
│       │   │   │   └── suggest_followup_chat.py
│       │   │   ├── react.py
│       │   │   ├── rewoo.py
│       │   │   └── simple.py
│       │   ├── rerankings/
│       │   │   ├── __init__.py
│       │   │   ├── db.py
│       │   │   ├── manager.py
│       │   │   └── ui.py
│       │   ├── settings.py
│       │   └── utils/
│       │       ├── __init__.py
│       │       ├── commands.py
│       │       ├── conversation.py
│       │       ├── file.py
│       │       ├── generator.py
│       │       ├── hf_papers.py
│       │       ├── lang.py
│       │       ├── plantuml.py
│       │       ├── rate_limit.py
│       │       ├── render.py
│       │       └── visualize_cited.py
│       ├── ktem_tests/
│       │   ├── __init__.py
│       │   ├── resources/
│       │   │   └── embedding_openai.json
│       │   └── test_qa.py
│       ├── migrations/
│       │   ├── README
│       │   ├── env.py
│       │   ├── script.py.mako
│       │   └── versions/
│       │       └── .keep
│       ├── pyproject.toml
│       └── requirements.txt
├── mkdocs.yml
├── pyproject.toml
├── scripts/
│   ├── download_pdfjs.sh
│   ├── migrate/
│   │   ├── __init__.py
│   │   └── migrate_chroma_db.py
│   ├── run_linux.sh
│   ├── run_macos.sh
│   ├── run_windows.bat
│   ├── serve_local.py
│   ├── server_llamacpp_linux.sh
│   ├── server_llamacpp_macos.sh
│   ├── server_llamacpp_windows.bat
│   ├── update_linux.sh
│   ├── update_macos.sh
│   └── update_windows.bat
├── settings.yaml.example
├── sso_app.py
├── sso_app_demo.py
└── templates/
    ├── component-default/
    │   └── README.md
    └── project-default/
        ├── cookiecutter.json
        └── {{cookiecutter.project_name}}/
            ├── .gitattributes
            ├── .gitignore
            ├── .pre-commit-config.yaml
            ├── README.md
            ├── setup.py
            ├── tests/
            │   └── __init__.py
            └── {{cookiecutter.project_name}}/
                ├── __init__.py
                └── pipeline.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .commitlintrc
================================================
{
  "extends": ["@commitlint/config-conventional"],
  "defaultIgnores": true,
  "rules": {
    "body-leading-blank": [1, "always"],
    "body-max-line-length": [2, "always", 100],
    "footer-leading-blank": [1, "always"],
    "footer-max-line-length": [2, "always", 10000],
    "header-max-length": [2, "always", 200],
    "subject-case": [
      2,
      "never",
      []
    ],
    "subject-empty": [2, "never"],
    "subject-full-stop": [2, "never", "."],
    "type-case": [2, "always", "lower-case"],
    "type-empty": [2, "never"],
    "type-enum": [
      2,
      "always",
      [
        "build",
        "chore",
        "ci",
        "docs",
        "feat",
        "fix",
        "perf",
        "refactor",
        "revert",
        "style",
        "test"
      ]
    ]
  }
}


================================================
FILE: .dockerignore
================================================
.github/
.git/
.mypy_cache/
__pycache__/
ktem_app_data/
env/
.pre-commit-config.yaml
.commitlintrc
.gitignore
.gitattributes
README.md
*.zip
*.sh

!/launch.sh


================================================
FILE: .gitattributes
================================================
*.bat   text eol=crlf


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: "Bug Report"
description: Report something that is not working as expected
title: "[BUG] "
labels: ["bug"]
body:
  - type: markdown
    attributes:
      value: |
        *Please fill this form with as much information as possible.*
  - type: textarea
    id: description
    attributes:
      label: "Description"
      description: Please enter an explicit description of your issue
      placeholder: Short and explicit description of your incident...
    validations:
      required: true
  - type: textarea
    id: reprod
    attributes:
      label: "Reproduction steps"
      description: Please enter an explicit description of your issue
      value: |
        1. Go to '...'
        2. Click on '....'
        3. Scroll down to '....'
        4. See error
      render: bash
    validations:
      required: true
  - type: textarea
    id: screenshot
    attributes:
      label: "Screenshots"
      description: If applicable, add screenshots to help explain your problem.
      value: |
        ![DESCRIPTION](LINK.png)
      render: bash
    validations:
      required: false
  - type: textarea
    id: logs
    attributes:
      label: "Logs"
      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
      render: bash
    validations:
      required: false
  - type: dropdown
    id: browsers
    attributes:
      label: "Browsers"
      description: What browsers are you seeing the problem on ?
      multiple: true
      options:
        - Firefox
        - Chrome
        - Safari
        - Microsoft Edge
        - Opera
        - Brave
        - Other
    validations:
      required: false
  - type: dropdown
    id: os
    attributes:
      label: "OS"
      description: What is the impacted environment ?
      multiple: true
      options:
        - Windows
        - MacOS
        - Linux
        - Other
    validations:
      required: false
  - type: textarea
    id: additional_information
    attributes:
      label: "Additional information"
      description: Add any relevant information or context.
      placeholder:
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: "Feature Request"
description: Brainstorm and propose new features for the project
title: "[REQUEST] "
labels: ["enhancement"]
body:
  - type: markdown
    attributes:
      value: |
        *Please fill this form with as much information as possible.*
  - type: textarea
    id: reference_issues
    attributes:
      label: "Reference Issues"
      description: Common issues
      placeholder: "#Issues IDs"
    validations:
      required: false
  - type: textarea
    id: summary
    attributes:
      label: "Summary"
      description: Provide a brief explanation of the feature
      placeholder: Describe in a few lines your feature request
    validations:
      required: true
  - type: textarea
    id: basic_example
    attributes:
      label: "Basic Example"
      description: Indicate here some basic examples of your feature.
      placeholder: A few specific words about your feature request.
    validations:
      required: true
  - type: textarea
    id: drawbacks
    attributes:
      label: "Drawbacks"
      description: What are the drawbacks/impacts of your feature request ?
      placeholder: Identify the drawbacks and impacts while being neutral on your feature request
    validations:
      required: true
  - type: textarea
    id: additional_information
    attributes:
      label: "Additional information"
      description: Add any additional information that you think is important for your feature request
      placeholder:
    validations:
      required: false


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Description

- Please include a summary of the changes and the related issue.
- Fixes # (issue)

## Type of change

- [ ] New features (non-breaking change).
- [ ] Bug fix (non-breaking change).
- [ ] Breaking change (fix or feature that would cause existing functionality not to work as expected).

## Checklist

- [ ] I have performed a self-review of my code.
- [ ] I have added thorough tests if it is a core feature.
- [ ] There is a reference to the original bug report and related work.
- [ ] I have commented on my code, particularly in hard-to-understand areas.
- [ ] The feature is well documented.


================================================
FILE: .github/workflows/auto-bump-and-release.yaml
================================================
name: Auto Bump and Release

on:
  push:
    branches:
      - main

jobs:
  auto-bump-and-release:
    runs-on: ubuntu-latest
    steps:
      - name: Clone the repo
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Update Application Version
        id: update-version
        uses: anothrNick/github-tag-action@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          WITH_V: true
          DEFAULT_BUMP: patch
          MAJOR_STRING_TOKEN: "bump:major"
          MINOR_STRING_TOKEN: "bump:minor"
          PATCH_STRING_TOKEN: "bump:patch"
      - name: Create release for ${{ steps.update-version.outputs.new_tag }}
        # need to repeat this if statement because Github Action doesn't support early
        # stopping for steps
        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}
        run: |
          echo Create release folder
          mkdir kotaemon-app
          echo ${{ steps.update-version.outputs.new_tag }} > kotaemon-app/VERSION
          cp LICENSE.txt kotaemon-app/
          cp flowsettings.py kotaemon-app/
          cp app.py kotaemon-app/
          cp .env.example kotaemon-app/.env
          cp -r scripts kotaemon-app/
          mkdir -p kotaemon-app/libs/ktem/ktem/
          cp -r libs/ktem/ktem/assets kotaemon-app/libs/ktem/ktem/

          tree kotaemon-app
          zip -r kotaemon-app.zip kotaemon-app
      - name: Release ${{ steps.update-version.outputs.new_tag }}
        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}
        uses: softprops/action-gh-release@v2
        with:
          files: kotaemon-app.zip
          fail_on_unmatched_files: true
          token: ${{ secrets.GITHUB_TOKEN }}
          generate_release_notes: true
          tag_name: ${{ steps.update-version.outputs.new_tag }}
          make_latest: true
      - name: Setup latest branch locally without switching current branch
        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}
        run: git fetch origin latest:latest
      - name: Update latest branch
        if: ${{ steps.update-version.outputs.new_tag != steps.update-version.outputs.old_tag }}
        run: |
          git branch -f latest tags/${{ steps.update-version.outputs.new_tag }}
          git checkout latest
          git push -f -u origin latest


================================================
FILE: .github/workflows/build-push-docker.yaml
================================================
name: Build and Push Docker Image

on:
  release:
    types:
      - created

  push:
    tags:
      - "v[0-9]+.[0-9]+.[0-9]+"

  workflow_dispatch:

env:
  REGISTRY: ghcr.io

jobs:
  build:
    name: Build and push container
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
      attestations: write
      id-token: write
    strategy:
      matrix:
        target:
          - lite
          - full
          - ollama
      # The maximum number of jobs that can run simultaneously
      max-parallel: 1
    steps:
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: true

          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: true
          swap-storage: true

      - name: Set repository and image name
        run: |
          echo "FULL_IMAGE_NAME=${{ env.REGISTRY }}/${IMAGE_NAME,,}" >>${GITHUB_ENV}
        env:
          IMAGE_NAME: "${{ github.repository }}"

      - name: Checkout
        uses: actions/checkout@v4

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
        with:
          image: tonistiigi/binfmt:latest
          platforms: arm64,arm

      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3

      - name: Set up Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.FULL_IMAGE_NAME }}
          tags: |
            # branch
            type=ref,event=branch,suffix=-${{ matrix.target }}
            # semver with suffix for lite/full targets
            type=semver,pattern={{version}},suffix=-${{ matrix.target }}
            # latest tag with suffix for lite/full targets
            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'pre') }},suffix=-${{ matrix.target }}
          flavor: |
            # This is disabled here so we can use the raw form above
            latest=false
            # Suffix is not used here since there's no way to disable it above

      - name: Log in to the Container registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build docker image
        uses: docker/build-push-action@v6
        with:
          file: Dockerfile
          context: .
          push: true
          platforms: linux/amd64, linux/arm64
          tags: |
            ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          target: ${{ matrix.target }}
          cache-from: type=gha
          cache-to: type=gha,mode=max


================================================
FILE: .github/workflows/pr-lint.yaml
================================================
name: "Lint PR"

on:
  pull_request:
    types:
      - opened
      - edited
      - synchronize

permissions:
  pull-requests: write

jobs:
  pr-title:
    name: Validate PR title
    runs-on: ubuntu-latest
    permissions: write-all
    steps:
      - uses: amannn/action-semantic-pull-request@v5
        id: lint_pr_title
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - uses: marocchino/sticky-pull-request-comment@v2
        # When the previous steps fails, the workflow would stop. By adding this
        # condition you can continue the execution with the populated error message.
        if: always() && (steps.lint_pr_title.outputs.error_message != null)
        with:
          header: pr-title-lint-error
          message: |
            Hey there and thank you for opening this pull request! 👋🏼

            We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted.
            Details:
            ```
            ${{ steps.lint_pr_title.outputs.error_message }}
            ```

      # Delete a previous comment when the issue has been resolved
      - if: ${{ steps.lint_pr_title.outputs.error_message == null }}
        uses: marocchino/sticky-pull-request-comment@v2
        with:
          header: pr-title-lint-error
          delete: true

  commitlint:
    if: false # Disable this job for now
    name: Validate commit messages
    runs-on: ubuntu-latest
    permissions: write-all
    steps:
      - uses: actions/checkout@v4
      - uses: wagoid/commitlint-github-action@v6
        id: commitlint
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          configFile: ./.commitlintrc
      - uses: buildingcash/json-to-markdown-table-action@v1
        if: always() && (steps.commitlint.outcome != 'success')
        id: table
        with:
          json: ${{ steps.commitlint.outputs.results }}
      - uses: marocchino/sticky-pull-request-comment@v2
        if: always() && (steps.commitlint.outcome != 'success')
        with:
          header: commitlint-error
          message: |
            **All commits** in this PR need to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and [.commitlintrc](${{ github.server_url }}/${{ github.repository }}/blob/${{ github.head_ref || github.ref_name }}/.commitlintrc).
            Details:
            ${{ steps.table.outputs.table }}

      - if: ${{ steps.commitlint.outcome == 'success' }}
        uses: marocchino/sticky-pull-request-comment@v2
        with:
          header: commitlint-error
          delete: true


================================================
FILE: .github/workflows/style-check.yaml
================================================
name: style-check

on:
  pull_request:
    branches: [main, develop]
  push:
    branches: [main, develop]

jobs:
  pre-commit:
    runs-on: ubuntu-latest
    steps:
      - name: Clone the repo
        uses: actions/checkout@v4
      - name: Setup python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: run pre-commit
        uses: pre-commit/action@v3.0.0


================================================
FILE: .github/workflows/unit-test.yaml
================================================
name: unit-test

on:
  pull_request:
    branches: [main]
  push:
    branches: [main]

env:
  THEFLOW_TEMP_PATH: ./tmp

jobs:
  unit-test:
    # if: false # temporary disable this job due to legacy interface
    #TODO: enable this job after the new interface is ready
    if: ${{ !cancelled() }}
    runs-on: ${{ matrix.os }}
    timeout-minutes: 20
    defaults:
      run:
        shell: ${{ matrix.shell }}
    strategy:
      matrix:
        python-version: ["3.10", "3.11"]
        include:
          - os: ubuntu-latest
            shell: bash
            ACTIVATE_ENV: ". env/bin/activate"
            GITHUB_OUTPUT: "$GITHUB_OUTPUT"
          # - os: windows-latest
          #   shell: pwsh
          #   ACTIVATE_ENV: env/Scripts/activate.ps1
          #   GITHUB_OUTPUT: "$env:GITHUB_OUTPUT"

    name: unit testing with python ${{ matrix.python-version }}
    steps:
      - name: Clone the repo
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}

      - name: Get Head Commit Message
        id: get-head-commit-message
        run: echo "message=$(git show -s --format=%s)" | tee -a ${{ matrix.GITHUB_OUTPUT }}

      - name: Check ignore caching
        id: check-ignore-cache
        run: |
          ignore_cache=${{ contains(steps.get-head-commit-message.outputs.message, '[ignore cache]') }}
          echo "check=$ignore_cache" | tee -a ${{ matrix.GITHUB_OUTPUT }}

      - name: Set up Python ${{ matrix.python-version }} on ${{ runner.os }}
        uses: actions/setup-python@v4
        id: setup_python
        with:
          python-version: ${{ matrix.python-version }}
          architecture: x64

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true

      - name: Get cache key
        id: get-cache-key
        run: |
          pip install "setuptools-git-versioning>=2.0,<3"
          package_version=$(setuptools-git-versioning)
          cache_key="${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}"
          echo "key=$cache_key" | tee -a ${{ matrix.GITHUB_OUTPUT }}

      - name: Try to restore dependencies from ${{ steps.get-cache-key.outputs.key }}
        id: restore-dependencies
        if: steps.check-ignore-cache.outputs.check != 'true'
        uses: actions/cache/restore@v3
        with:
          path: ${{ env.pythonLocation }}
          key: ${{ steps.get-cache-key.outputs.key }}
          # could using cache of previous ver to reuse unchanged packages
          restore-keys: ${{ runner.os }}-py${{ matrix.python-version }}

      - name: Check cache hit
        id: check-cache-hit
        run: |
          echo "cache-hit=${{ steps.restore-dependencies.outputs.cache-hit }}"
          echo "cache-matched-key=${{ steps.restore-dependencies.outputs.cache-matched-key }}"
          cache_hit=${{ steps.restore-dependencies.outputs.cache-primary-key == steps.restore-dependencies.outputs.cache-matched-key }}
          echo "check=$cache_hit" | tee -a ${{ matrix.GITHUB_OUTPUT }}

      - name: Install additional dependencies (if any)
        run: |
          uv sync --frozen --no-cache

      - name: New dependencies cache for key ${{ steps.restore-dependencies.outputs.cache-primary-key }}
        if: |
          steps.check-ignore-cache.outputs.check != 'true' &&
          steps.check-cache-hit.outputs.check != 'true'
        uses: actions/cache/save@v3
        with:
          path: ${{ env.pythonLocation }}
          key: ${{ steps.restore-dependencies.outputs.cache-primary-key }}

      - name: Install OS-based packages
        run: |
          sudo apt update -qqy
          sudo apt install -y poppler-utils libpoppler-dev tesseract-ocr

      - name: Test kotaemon with pytest
        run: |
          source .venv/bin/activate
          uv pip show pytest
          cd libs/kotaemon
          pytest


================================================
FILE: .gitignore
================================================
# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm

activate*
activate/*
kotaemon-env*
.env

### Emacs ###
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*

# Org-mode
.org-id-locations
*_archive

# flymake-mode
*_flymake.*

# eshell files
/eshell/history
/eshell/lastdir

# elpa packages
/elpa/

# reftex files
*.rel

# AUCTeX auto folder
/auto/

# cask packages
.cask/
dist/

# Flycheck
flycheck_*.el

# server auth directory
/server/

# projectiles files
.projectile

# directory configuration
.dir-locals.el

# network security
/network-security.data

### Linux ###

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*

### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### macOS Patch ###
# iCloud generated files
*.icloud

### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# AWS User-specific
.idea/**/aws.xml

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn.  Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# SonarLint plugin
.idea/sonarlint/

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
# https://plugins.jetbrains.com/plugin/7973-sonarlint
.idea/**/sonarlint/

# SonarQube Plugin
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
.idea/**/sonarIssues.xml

# Markdown Navigator plugin
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
.idea/**/markdown-navigator.xml
.idea/**/markdown-navigator-enh.xml
.idea/**/markdown-navigator/

# Cache file creation bug
# See https://youtrack.jetbrains.com/issue/JBR-2257
.idea/$CACHE_FILE$

# CodeStream plugin
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml

# Azure Toolkit for IntelliJ plugin
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
.idea/**/azureSettings.xml

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml

# ruff
.ruff_cache/

# LSP config files
pyrightconfig.json

### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]

# Session
Session.vim
Sessionx.vim

# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~

### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide

### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db

# Dump file
*.stackdump

# Folder config file
[Dd]esktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp

# Windows shortcuts
*.lnk

# PDF files
*.pdf
!libs/kotaemon/tests/resources/*.pdf

.theflow/

# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
*.py[coid]

logs/
.gitsecret/keys/random_seed
!*.secret
.envrc
.env

S.gpg-agent*
.vscode/settings.json
examples/example1/assets
storage/*

# Conda and env storages
*install_dir/
doc_env/

# application data
ktem_app_data/
gradio_tmp/


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.3.0
    hooks:
      - id: check-yaml
        args: ["--unsafe"]
      - id: check-toml
      - id: end-of-file-fixer
      - id: trailing-whitespace
      - id: mixed-line-ending
      - id: detect-aws-credentials
        args: ["--allow-missing-credentials"]
      - id: detect-private-key
      - id: check-added-large-files
        args: ["--maxkb=750"]
      - id: debug-statements
  - repo: https://github.com/ambv/black
    rev: 22.3.0
    hooks:
      - id: black
        language_version: python3
  - repo: https://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
      - id: isort
        args: ["--profile", "black"]
        language_version: python3.10
  - repo: https://github.com/pycqa/flake8
    rev: 4.0.1
    hooks:
      - id: flake8
        args: ["--max-line-length", "88", "--extend-ignore", "E203"]
  - repo: https://github.com/myint/autoflake
    rev: v1.4
    hooks:
      - id: autoflake
        args:
          [
            "--in-place",
            "--remove-unused-variables",
            "--remove-all-unused-imports",
            "--ignore-init-module-imports",
            "--exclude=tests/*",
          ]
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v2.7.1
    hooks:
      - id: prettier
        types_or: [markdown, yaml]
  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: "v1.7.1"
    hooks:
      - id: mypy
        additional_dependencies:
          [
            types-PyYAML==6.0.12.11,
            "types-requests",
            "sqlmodel",
            "types-Markdown",
            "types-cachetools",
            types-tzlocal,
          ]
        args: ["--check-untyped-defs", "--ignore-missing-imports"]
        exclude: "^templates/"
  - repo: https://github.com/codespell-project/codespell
    rev: v2.2.4
    hooks:
      - id: codespell
        additional_dependencies:
          - tomli


================================================
FILE: .python-version
================================================
3.10


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

- Demonstrating empathy and kindness toward other people
- Being respectful of differing opinions, viewpoints, and experiences
- Giving and gracefully accepting constructive feedback
- Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
- Focusing on what is best not just for us as individuals, but for the
  overall community

Examples of unacceptable behavior include:

- The use of sexualized language or imagery, and sexual attention or
  advances of any kind
- Trolling, insulting or derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing others' private information, such as a physical or email
  address, without their explicit permission
- Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series
of actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
the community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Kotaemon

Welcome 👋 to the Kotaemon project! We're thrilled that you're interested in contributing. Whether you're fixing bugs, adding new features, or improving documentation, your efforts are highly appreciated. This guide aims to help you get started with contributing to Kotaemon.

<a href="https://github.com/Cinnamon/kotaemon/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=Cinnamon/kotaemon" />
</a>

### Table of Contents

1. [📖 Code of Conduct](#code-of-conduct)
2. [🔁 Contributing via Pull Requests](#contributing-via-pull-requests)
3. [📥 Opening an Issue](#-opening-an-issue)
4. [📝 Commit Messages](#-commit-messages)
5. [🧾 License](#-license)

## 📖 Code of Conduct

Please review our [code of conduct](./CODE_OF_CONDUCT.md), which is in effect at all times. We expect everyone who contributes to this project to honor it.

## 🔁 Contributing via Pull Requests

1. [**Fork the repository**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo): Click on the [Fork](https://github.com/Cinnamon/kotaemon/fork) button on the repository's page to create a copy of Kotaemon under your GitHub account.

2. [**Clone your code**](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository): Clone your forked repository to your local machine.

3. [**Create new branch**](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop): Create a new branch in your forked repo with a descriptive name that reflects your changes.

```sh
git checkout -b descriptive-name-for-your-changes
```

4. **Setup the development environment**: If you are working on the code, make sure to install the necessary dependencies for development

```sh
pip install -e "libs/kotaemon[dev]"
```

5. **Make your changes**: Ensure your code follows the project's coding style and passes all test cases.

   - Check the coding style

   ```sh
   pre-commit run --all-files
   ```

   - Run the tests

   ```sh
   pytest libs/kotaemon/tests/
   ```

6. [**Commit your changes**](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop): Once you are done with your changes, add and commit them with clear messages.

```sh
git add your_changes.py
git commit -m "clear message described your changes."
git push -u origin descriptive-name-for-your-changes
```

7. [**Create a pull request**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request): When you are satisfied with your changes, [submit a pull request](https://github.com/Cinnamon/kotaemon/compare) from your forked repository to Kotaemon repository. In the pull request, provide a clear description of your changes and any related issues. For the title of the pull request, please refer to our [commit messages convention](#-commit-messages).

8. **Wait for reviews**: Wait for the maintainers to review your pull request. If everything is okay, your changes will be merged into the Kotaemon project.

### GitHub Actions CI Tests

All pull requests must pass the [GitHub Actions Continuous Integration (CI)](https://docs.github.com/en/actions/about-github-actions/about-continuous-integration-with-github-actions) tests before they can be merged. These tests include coding-style checks, PR title validation, unit tests, etc. to ensure that your changes meet the project's quality standards. Please review and fix any CI failures that arise.

## 📥 Opening an Issue

Before [creating an issues](https://github.com/Cinnamon/kotaemon/issues/new/choose), search through existing issues to ensure you are not opening a duplicate. If you are reporting a bug or issue, please provide a reproducible example to help us quickly identify the problem.

## 📝 Commit Messages

### Overview

We use [Angular convention](https://www.conventionalcommits.org/en/) for commit messages to maintain consistency and clarity in our project history. Please take a moment to familiarize yourself with this convention before making your first commit.

_For the sake of simplicity, we use [squashing merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squash-and-merge-your-commits) with pull requests. Therefore, if you contribute via a pull request, just make sure your PR's title, instead of the whole commits, follows this convention._

Commit format:

```sh
<gitmoji> <type>(<scope>): <subject>
<BLANK LINE>
<body>
<BLANK LINE>
<footer>
```

Examples:

```sh
docs(api): update api doc
```

### Commit types

| Types      | Description                                                   |
| :--------- | :------------------------------------------------------------ |
| `feat`     | New features                                                  |
| `fix`      | Bug fix                                                       |
| `docs`     | Documentation only changes                                    |
| `build`    | Changes that affect the build system or external dependencies |
| `chore`    | Something that doesn’t fit the other types                    |
| `ci`       | Changes to our CI configuration files and scripts             |
| `perf`     | Improve performance                                           |
| `refactor` | Refactor code                                                 |
| `revert`   | Revert a previous commit                                      |
| `style`    | Improve structure/format of the code                          |
| `test`     | Add, update or pass tests                                     |

## 🧾 License

All contributions will be licensed under the project's license: [Apache License 2.0](https://github.com/Cinnamon/kotaemon/blob/main/LICENSE.txt).


================================================
FILE: Dockerfile
================================================
# Lite version
FROM python:3.10-slim AS lite

# Common dependencies
RUN apt-get update -qqy && \
    apt-get install -y --no-install-recommends \
        ssh \
        git \
        gcc \
        g++ \
        poppler-utils \
        libpoppler-dev \
        unzip \
        curl \
        cargo \
        && \
    apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*

# Setup args
ARG TARGETPLATFORM
ARG TARGETARCH

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV TARGETARCH=${TARGETARCH}

# Create working directory
WORKDIR /app

# Download pdfjs
COPY scripts/download_pdfjs.sh /app/scripts/download_pdfjs.sh
RUN chmod +x /app/scripts/download_pdfjs.sh
ENV PDFJS_PREBUILT_DIR="/app/libs/ktem/ktem/assets/prebuilt/pdfjs-dist"
RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR

# Install uv dependencies
RUN pip install --no-cache-dir "uv"

# Copy contents
COPY . /app
COPY launch.sh /app/launch.sh
COPY .env.example /app/.env

# Install pip packages
RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    uv sync --frozen --no-cache \
    && uv pip install --python .venv "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"

RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    if [ "$TARGETARCH" = "amd64" ]; then uv pip install --python .venv "graphrag<=0.3.6" future; fi

ENTRYPOINT ["sh", "/app/launch.sh"]

# Full version
FROM lite AS full

# Additional dependencies for full version
RUN apt-get update -qqy && \
    apt-get install -y --no-install-recommends \
        tesseract-ocr \
        tesseract-ocr-jpn \
        libsm6 \
        libxext6 \
        libreoffice \
        ffmpeg \
        libmagic-dev \
        && \
    apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*

# Install torch and torchvision for unstructured
RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    uv pip install --python .venv torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# Install additional pip packages
RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    uv pip install --python .venv "libs/kotaemon[adv]" \
    && uv pip install --python .venv unstructured[all-docs]

# Install lightRAG
ENV USE_LIGHTRAG=true
RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    uv pip install --python .venv aioboto3 nano-vectordb ollama xxhash "lightrag-hku<=1.3.0"

RUN --mount=type=ssh  \
    --mount=type=cache,target=/root/.cache/uv  \
    uv pip install --python .venv "docling<=2.5.2"

# Download NLTK data from LlamaIndex
RUN /app/.venv/bin/python -c "from llama_index.core.readers.base import BaseReader"

ENTRYPOINT ["sh", "/app/launch.sh"]

# Ollama-bundled version
FROM full AS ollama

# Install ollama
RUN curl -fsSL https://ollama.com/install.sh | sh

# RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull qwen2.5:7b
RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull nomic-embed-text

ENTRYPOINT ["sh", "/app/launch.sh"]


================================================
FILE: LICENSE.txt
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
<div align="center">

# kotaemon

An open-source clean & customizable RAG UI for chatting with your documents. Built with both end users and
developers in mind.

![Preview](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/preview-graph.png)

<a href="https://trendshift.io/repositories/11607" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11607" alt="Cinnamon%2Fkotaemon | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

[Live Demo #1](https://huggingface.co/spaces/cin-model/kotaemon) |
[Live Demo #2](https://huggingface.co/spaces/cin-model/kotaemon-demo) |
[Online Install](https://cinnamon.github.io/kotaemon/online_install/) |
[Colab Notebook (Local RAG)](https://colab.research.google.com/drive/1eTfieec_UOowNizTJA1NjawBJH9y_1nn)

[User Guide](https://cinnamon.github.io/kotaemon/) |
[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
[Feedback](https://github.com/Cinnamon/kotaemon/issues) |
[Contact](mailto:kotaemon.support@cinnamon.is)

[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-31013/)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
<a href="https://github.com/Cinnamon/kotaemon/pkgs/container/kotaemon" target="_blank">
<img src="https://img.shields.io/badge/docker_pull-kotaemon:latest-brightgreen" alt="docker pull ghcr.io/cinnamon/kotaemon:latest"></a>
![download](https://img.shields.io/github/downloads/Cinnamon/kotaemon/total.svg?label=downloads&color=blue)
<a href='https://huggingface.co/spaces/cin-model/kotaemon-demo'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
<a href="https://hellogithub.com/en/repository/d3141471a0244d5798bc654982b263eb" target="_blank"><img src="https://abroad.hellogithub.com/v1/widgets/recommend.svg?rid=d3141471a0244d5798bc654982b263eb&claim_uid=RLiD9UZ1rEHNaMf&theme=small" alt="Featured｜HelloGitHub" /></a>

</div>

<!-- start-intro -->

## Introduction

This project serves as a functional RAG UI for both end users who want to do QA on their
documents and developers who want to build their own RAG pipeline.
<br>

```yml
+----------------------------------------------------------------------------+
| End users: Those who use apps built with `kotaemon`.                       |
| (You use an app like the one in the demo above)                            |
|     +----------------------------------------------------------------+     |
|     | Developers: Those who built with `kotaemon`.                   |     |
|     | (You have `import kotaemon` somewhere in your project)         |     |
|     |     +----------------------------------------------------+     |     |
|     |     | Contributors: Those who make `kotaemon` better.    |     |     |
|     |     | (You make PR to this repo)                         |     |     |
|     |     +----------------------------------------------------+     |     |
|     +----------------------------------------------------------------+     |
+----------------------------------------------------------------------------+
```

### For end users

- **Clean & Minimalistic UI**: A user-friendly interface for RAG-based QA.
- **Support for Various LLMs**: Compatible with LLM API providers (OpenAI, AzureOpenAI, Cohere, etc.) and local LLMs (via `ollama` and `llama-cpp-python`).
- **Easy Installation**: Simple scripts to get you started quickly.

### For developers

- **Framework for RAG Pipelines**: Tools to build your own RAG-based document QA pipeline.
- **Customizable UI**: See your RAG pipeline in action with the provided UI, built with <a href='https://github.com/gradio-app/gradio'>Gradio <img src='https://img.shields.io/github/stars/gradio-app/gradio'></a>.
- **Gradio Theme**: If you use Gradio for development, check out our theme here: [kotaemon-gradio-theme](https://github.com/lone17/kotaemon-gradio-theme).

## Key Features

- **Host your own document QA (RAG) web-UI**: Support multi-user login, organize your files in private/public collections, collaborate and share your favorite chat with others.

- **Organize your LLM & Embedding models**: Support both local LLMs & popular API providers (OpenAI, Azure, Ollama, Groq).

- **Hybrid RAG pipeline**: Sane default RAG pipeline with hybrid (full-text & vector) retriever and re-ranking to ensure best retrieval quality.

- **Multi-modal QA support**: Perform Question Answering on multiple documents with figures and tables support. Support multi-modal document parsing (selectable options on UI).

- **Advanced citations with document preview**: By default the system will provide detailed citations to ensure the correctness of LLM answers. View your citations (incl. relevant score) directly in the _in-browser PDF viewer_ with highlights. Warning when retrieval pipeline return low relevant articles.

- **Support complex reasoning methods**: Use question decomposition to answer your complex/multi-hop question. Support agent-based reasoning with `ReAct`, `ReWOO` and other agents.

- **Configurable settings UI**: You can adjust most important aspects of retrieval & generation process on the UI (incl. prompts).

- **Extensible**: Being built on Gradio, you are free to customize or add any UI elements as you like. Also, we aim to support multiple strategies for document indexing & retrieval. `GraphRAG` indexing pipeline is provided as an example.

![Preview](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/preview.png)

## Installation

> If you are not a developer and just want to use the app, please check out our easy-to-follow [User Guide](https://cinnamon.github.io/kotaemon/). Download the `.zip` file from the [latest release](https://github.com/Cinnamon/kotaemon/releases/latest) to get all the newest features and bug fixes.

### System requirements

1. [Python](https://www.python.org/downloads/) >= 3.10
2. [Docker](https://www.docker.com/): optional, if you [install with Docker](#with-docker-recommended)
3. [Unstructured](https://docs.unstructured.io/open-source/installation/full-installation#full-installation) if you want to process files other than `.pdf`, `.html`, `.mhtml`, and `.xlsx` documents. Installation steps differ depending on your operating system. Please visit the link and follow the specific instructions provided there.

### With Docker (recommended)

1. We support both `lite` & `full` version of Docker images. With `full` version, the extra packages of `unstructured` will be installed, which can support additional file types (`.doc`, `.docx`, ...) but the cost is larger docker image size. For most users, the `lite` image should work well in most cases.

   - To use the `full` version.

     ```bash
     docker run \
     -e GRADIO_SERVER_NAME=0.0.0.0 \
     -e GRADIO_SERVER_PORT=7860 \
     -v ./ktem_app_data:/app/ktem_app_data \
     -p 7860:7860 -it --rm \
     ghcr.io/cinnamon/kotaemon:main-full
     ```

   - To use the `full` version with bundled **Ollama** for _local / private RAG_.

     ```bash
     # change image name to
     docker run <...> ghcr.io/cinnamon/kotaemon:main-ollama
     ```

   - To use the `lite` version.

   ```bash
    # change image name to
    docker run <...> ghcr.io/cinnamon/kotaemon:main-lite
   ```

2. We currently support and test two platforms: `linux/amd64` and `linux/arm64` (for newer Mac). You can specify the platform by passing `--platform` in the `docker run` command. For example:

   ```bash
   # To run docker with platform linux/arm64
   docker run \
   -e GRADIO_SERVER_NAME=0.0.0.0 \
   -e GRADIO_SERVER_PORT=7860 \
   -v ./ktem_app_data:/app/ktem_app_data \
   -p 7860:7860 -it --rm \
   --platform linux/arm64 \
   ghcr.io/cinnamon/kotaemon:main-lite
   ```

3. Once everything is set up correctly, you can go to `http://localhost:7860/` to access the WebUI.

4. We use [GHCR](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry) to store docker images, all images can be found [here.](https://github.com/Cinnamon/kotaemon/pkgs/container/kotaemon)

### Without Docker

#### Option 1: Using uv (Recommended for faster installation)

1. Clone the repository and run the uv installation script:

   ```shell
   # clone this repo
   git clone https://github.com/Cinnamon/kotaemon
   cd kotaemon

   # run the uv installation script (installs uv automatically if not present)
   bash scripts/run_uv.sh
   ```

   This script will:

   - Install uv package manager if not present
   - Create a virtual environment with Python 3.10
   - Install all dependencies using uv (significantly faster than conda/pip)
   - Set up PDF.js viewer
   - Launch the application

#### Option 2: Using conda (Traditional method)

1. Clone and install required packages on a fresh python environment.

   ```shell
   # optional (setup env)
   conda create -n kotaemon python=3.10
   conda activate kotaemon

   # clone this repo
   git clone https://github.com/Cinnamon/kotaemon
   cd kotaemon

   pip install -e "libs/kotaemon[all]"
   pip install -e "libs/ktem"
   ```

2. Create a `.env` file in the root of this project. Use `.env.example` as a template

   The `.env` file is there to serve use cases where users want to pre-config the models before starting up the app (e.g. deploy the app on HF hub). The file will only be used to populate the db once upon the first run, it will no longer be used in consequent runs.

3. (Optional) To enable in-browser `PDF_JS` viewer, download [PDF_JS_DIST](https://github.com/mozilla/pdf.js/releases/download/v4.0.379/pdfjs-4.0.379-dist.zip) then extract it to `libs/ktem/ktem/assets/prebuilt`

<img src="https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/pdf-viewer-setup.png" alt="pdf-setup" width="300">

4. Start the web server:

   ```shell
   python app.py
   ```

   - The app will be automatically launched in your browser.
   - Default username and password are both `admin`. You can set up additional users directly through the UI.

   ![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)

5. Check the `Resources` tab and `LLMs and Embeddings` and ensure that your `api_key` value is set correctly from your `.env` file. If it is not set, you can set it there.

### Setup GraphRAG

> [!NOTE]
> Official MS GraphRAG indexing only works with OpenAI or Ollama API.
> We recommend most users to use NanoGraphRAG implementation for straightforward integration with Kotaemon.

<details>

<summary>Setup Nano GRAPHRAG</summary>

- Install nano-GraphRAG: `pip install nano-graphrag`
- `nano-graphrag` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)
  - To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`
- Launch Kotaemon with `USE_NANO_GRAPHRAG=true` environment variable.
- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from NanoGraphRAG.

</details>

<details>

<summary>Setup LIGHTRAG</summary>

- Install LightRAG: `pip install git+https://github.com/HKUDS/LightRAG.git`
- `LightRAG` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)
  - To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`
- Launch Kotaemon with `USE_LIGHTRAG=true` environment variable.
- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from LightRAG.

</details>

<details>

<summary>Setup MS GRAPHRAG</summary>

- **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:

  ```shell
  pip install "graphrag<=0.3.6" future
  ```

- **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file.
- **Using Local Models and Custom Settings**: If you want to use GraphRAG with local models (like `Ollama`) or customize the default LLM and other configurations, set the `USE_CUSTOMIZED_GRAPHRAG_SETTING` environment variable to true. Then, adjust your settings in the `settings.yaml.example` file.

</details>

### Setup Local Models (for local/private RAG)

See [Local model setup](docs/local_model.md).

### Setup multimodal document parsing (OCR, table parsing, figure extraction)

These options are available:

- [Azure Document Intelligence (API)](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence)
- [Adobe PDF Extract (API)](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/)
- [Docling (local, open-source)](https://github.com/DS4SD/docling)
  - To use Docling, first install required dependencies: `pip install docling`

Select corresponding loaders in `Settings -> Retrieval Settings -> File loader`

### Customize your application

- By default, all application data is stored in the `./ktem_app_data` folder. You can back up or copy this folder to transfer your installation to a new machine.

- For advanced users or specific use cases, you can customize these files:

  - `flowsettings.py`
  - `.env`

#### `flowsettings.py`

This file contains the configuration of your application. You can use the example
[here](flowsettings.py) as the starting point.

<details>

<summary>Notable settings</summary>

```python
# setup your preferred document store (with full-text search capabilities)
KH_DOCSTORE=(Elasticsearch | LanceDB | SimpleFileDocumentStore)

# setup your preferred vectorstore (for vector-based search)
KH_VECTORSTORE=(ChromaDB | LanceDB | InMemory | Milvus | Qdrant)

# Enable / disable multimodal QA
KH_REASONINGS_USE_MULTIMODAL=True

# Setup your new reasoning pipeline or modify existing one.
KH_REASONINGS = [
    "ktem.reasoning.simple.FullQAPipeline",
    "ktem.reasoning.simple.FullDecomposeQAPipeline",
    "ktem.reasoning.react.ReactAgentPipeline",
    "ktem.reasoning.rewoo.RewooAgentPipeline",
]
```

</details>

#### `.env`

This file provides another way to configure your models and credentials.

<details>

<summary>Configure model via the .env file</summary>

- Alternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in the folder of the application. If you don't see it, you can create one.

- Currently, the following providers are supported:

  - **OpenAI**

    In the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order
    to enable access to OpenAI's models. There are other variables that can be modified,
    please feel free to edit them to fit your case. Otherwise, the default parameter should
    work for most people.

    ```shell
    OPENAI_API_BASE=https://api.openai.com/v1
    OPENAI_API_KEY=<your OpenAI API key here>
    OPENAI_CHAT_MODEL=gpt-3.5-turbo
    OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
    ```

  - **Azure OpenAI**

    For OpenAI models via Azure platform, you need to provide your Azure endpoint and API
    key. Your might also need to provide your developments' name for the chat model and the
    embedding model depending on how you set up Azure development.

    ```shell
    AZURE_OPENAI_ENDPOINT=
    AZURE_OPENAI_API_KEY=
    OPENAI_API_VERSION=2024-02-15-preview
    AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo
    AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
    ```

  - **Local Models**

    - Using `ollama` OpenAI compatible server:

      - Install [ollama](https://github.com/ollama/ollama) and start the application.

      - Pull your model, for example:

        ```shell
        ollama pull llama3.1:8b
        ollama pull nomic-embed-text
        ```

      - Set the model names on web UI and make it as default:

        ![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/models.png)

    - Using `GGUF` with `llama-cpp-python`

      You can search and download a LLM to be ran locally from the [Hugging Face Hub](https://huggingface.co/models). Currently, these model formats are supported:

      - GGUF

        You should choose a model whose size is less than your device's memory and should leave
        about 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,
        then you should choose a model that takes up at most 10 GB of RAM. Bigger models tend to
        give better generation but also take more processing time.

        Here are some recommendations and their size in memory:

      - [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true): around 2 GB

        Add a new LlamaCpp model with the provided model name on the web UI.

  </details>

### Adding your own RAG pipeline

#### Custom Reasoning Pipeline

1. Check the default pipeline implementation in [here](libs/ktem/ktem/reasoning/simple.py). You can make quick adjustment to how the default QA pipeline work.
2. Add new `.py` implementation in `libs/ktem/ktem/reasoning/` and later include it in `flowssettings` to enable it on the UI.

#### Custom Indexing Pipeline

- Check sample implementation in `libs/ktem/ktem/index/file/graph`

> (more instruction WIP).

<!-- end-intro -->

## Citation

Please cite this project as

```BibTeX
@misc{kotaemon2024,
    title = {Kotaemon - An open-source RAG-based tool for chatting with any content.},
    author = {The Kotaemon Team},
    year = {2024},
    howpublished = {\url{https://github.com/Cinnamon/kotaemon}},
}
```

## Star History

<a href="https://star-history.com/#Cinnamon/kotaemon&Date">
 <picture>
   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date&theme=dark" />
   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date" />
   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=Cinnamon/kotaemon&type=Date" />
 </picture>
</a>

## Contribution

Since our project is actively being developed, we greatly value your feedback and contributions. Please see our [Contributing Guide](https://github.com/Cinnamon/kotaemon/blob/main/CONTRIBUTING.md) to get started. Thank you to all our contributors!

<a href="https://github.com/Cinnamon/kotaemon/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=Cinnamon/kotaemon" />
</a>


================================================
FILE: app.py
================================================
import os

from theflow.settings import settings as flowsettings

KH_APP_DATA_DIR = getattr(flowsettings, "KH_APP_DATA_DIR", ".")
KH_GRADIO_SHARE = getattr(flowsettings, "KH_GRADIO_SHARE", False)
GRADIO_TEMP_DIR = os.getenv("GRADIO_TEMP_DIR", None)
# override GRADIO_TEMP_DIR if it's not set
if GRADIO_TEMP_DIR is None:
    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, "gradio_tmp")
    os.environ["GRADIO_TEMP_DIR"] = GRADIO_TEMP_DIR


from ktem.main import App  # noqa

app = App()
demo = app.make()
demo.queue().launch(
    favicon_path=app._favicon,
    inbrowser=True,
    allowed_paths=[
        "libs/ktem/ktem/assets",
        GRADIO_TEMP_DIR,
    ],
    share=KH_GRADIO_SHARE,
)


================================================
FILE: doc_env_reqs.txt
================================================
mkdocs
mkdocstrings[python]
mkdocs-material
mkdocs-gen-files
mkdocs-literate-nav
mkdocs-git-revision-date-localized-plugin
mkdocs-section-index
mkdocs-include-markdown-plugin[cache]
mdx_truly_sane_lists


================================================
FILE: docs/about.md
================================================
# About Kotaemon

An open-source tool for chatting with your documents. Built with both end users and
developers in mind.

[Source Code](https://github.com/Cinnamon/kotaemon) |
[HF Space](https://huggingface.co/spaces/cin-model/kotaemon-demo)

[Installation Guide](https://cinnamon.github.io/kotaemon/) |
[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
[Feedback](https://github.com/Cinnamon/kotaemon/issues)


================================================
FILE: docs/development/contributing.md
================================================
# Contributing

## Setting up

- Clone the repo

  ```shell
  git clone git@github.com:Cinnamon/kotaemon.git
  cd kotaemon
  ```

- Install the environment

  - Create a conda environment (python >= 3.10 is recommended)

    ```shell
    conda create -n kotaemon python=3.10
    conda activate kotaemon

    # install dependencies
    cd libs/kotaemon
    pip install -e ".[all]"
    ```

  - Or run the installer (one of the `scripts/run_*` scripts depends on your OS), then
    you will have all the dependencies installed as a conda environment at
    `install_dir/env`.

    ```shell
    conda activate install_dir/env
    ```

- Pre-commit

  ```shell
  pre-commit install
  ```

- Test

  ```shell
  pytest tests
  ```

## Package overview

`kotaemon` library focuses on the AI building blocks to implement a RAG-based QA application. It consists of base interfaces, core components and a list of utilities:

- Base interfaces: `kotaemon` defines the base interface of a component in a pipeline. A pipeline is also a component. By clearly define this interface, a pipeline of steps can be easily constructed and orchestrated.
- Core components: `kotaemon` implements (or wraps 3rd-party libraries
  like Langchain, llama-index,... when possible) commonly used components in
  kotaemon use cases. Some of these components are: LLM, vector store,
  document store, retriever... For a detailed list and description of these
  components, please refer to the [API Reference](../reference/Summary.md) section.
- List of utilities: `kotaemon` provides utilities and tools that are
  usually needed in client project. For example, it provides a prompt
  engineering UI for AI developers in a project to quickly create a prompt
  engineering tool for DMs and QALs. It also provides a command to quickly spin
  up a project code base. For a full list and description of these utilities,
  please refer to the [Utilities](utilities.md) section.

```mermaid
mindmap
  root((kotaemon))
    Base Interfaces
      Document
      LLMInterface
      RetrievedDocument
      BaseEmbeddings
      BaseChat
      BaseCompletion
      ...
    Core Components
      LLMs
        AzureOpenAI
        OpenAI
      Embeddings
        AzureOpenAI
        OpenAI
        HuggingFaceEmbedding
      VectorStore
        InMemoryVectorstore
        ChromaVectorstore
      Agent
      Tool
      DocumentStore
      ...
    Utilities
      Scaffold project
      PromptUI
      Documentation Support
```

## Common conventions

- PR title: One-line description (example: Feat: Declare BaseComponent and decide LLM call interface).
- [Encouraged] Provide a quick description in the PR, so that:
  - Reviewers can quickly understand the direction of the PR.
  - It will be included in the commit message when the PR is merged.

## Environment caching on PR

- To speed up CI, environments are cached based on the version specified in `__init__.py`.
- Since dependencies versions in `setup.py` are not pinned, you need to pump the version in order to use a new environment. That environment will then be cached and used by your subsequence commits within the PR, until you pump the version again
- The new environment created during your PR is cached and will be available to others once the PR is merged.
- If you are experimenting with new dependencies and want a fresh environment every time, add `[ignore cache]` in your commit message. The CI will create a fresh environment to run your commit and then discard it.
- If your PR include updated dependencies, the recommended workflow would be:
  - Doing development as usual.
  - When you want to run the CI, push a commit with the message containing `[ignore cache]`.
  - Once the PR is final, pump the version in `__init__.py` and push a final commit not containing `[ignore cache]`.

## Merge PR guideline

- Use squash and merge option
- 1st line message is the PR title.
- The text area is the PR description.


================================================
FILE: docs/development/create-a-component.md
================================================
# Creating a component

A fundamental concept in kotaemon is "component".

Anything that isn't data or data structure is a "component". A component can be
thought of as a step within a pipeline. It takes in some input, processes it,
and returns an output, just the same as a Python function! The output will then
become an input for the next component in a pipeline. In fact, a pipeline is just
a component. More appropriately, a nested component: a component that makes use of one or more other components in
the processing step. So in reality, there isn't a difference between a pipeline
and a component! Because of that, in kotaemon, we will consider them the
same as "component".

To define a component, you will:

1. Create a class that subclasses from `kotaemon.base.BaseComponent`
2. Declare init params with type annotation
3. Declare nodes (nodes are just other components!) with type annotation
4. Implement the processing logic in `run`.

The syntax of a component is as follow:

```python
from kotaemon.base import BaseComponent
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.parsers import RegexExtractor


class FancyPipeline(BaseComponent):
    param1: str = "This is param1"
    param2: int = 10
    param3: float

    node1: BaseComponent    # this is a node because of BaseComponent type annotation
    node2: LCAzureChatOpenAI  # this is also a node because LCAzureChatOpenAI subclasses BaseComponent
    node3: RegexExtractor   # this is also a node bceause RegexExtractor subclasses BaseComponent

    def run(self, some_text: str):
        prompt = (self.param1 + some_text) * int(self.param2 + self.param3)
        llm_pred = self.node2(prompt).text
        matches = self.node3(llm_pred)
        return matches
```

Then this component can be used as follow:

```python
llm = LCAzureChatOpenAI(endpoint="some-endpont")
extractor = RegexExtractor(pattern=["yes", "Yes"])

component = FancyPipeline(
    param1="Hello"
    param3=1.5
    node1=llm,
    node2=llm,
    node3=extractor
)
component("goodbye")
```

This way, we can define each operation as a reusable component, and use them to
compose larger reusable components!

## Benefits of component

By defining a component as above, we formally encapsulate all the necessary
information inside a single class. This introduces several benefits:

1. Allow tools like promptui to inspect the inner working of a component in
   order to automatically generate the promptui.
2. Allow visualizing a pipeline for debugging purpose.


================================================
FILE: docs/development/data-components.md
================================================
# Data & Data Structure Components

The data & data structure components include:

- The `Document` class.
- The document store.
- The vector store.

## Data Loader

- PdfLoader
- Layout-aware with table parsing PdfLoader

  - MathPixLoader: To use this loader, you need MathPix API key, refer to [mathpix docs](https://docs.mathpix.com/#introduction) for more information
  - OCRLoader: This loader uses lib-table and Flax pipeline to perform OCR and read table structure from PDF file (TODO: add more info about deployment of this module).
  - Output:

    - Document: text + metadata to identify whether it is table or not

      ```
      - "source": source file name
      - "type": "table" or "text"
      - "table_origin": original table in markdown format (to be feed to LLM or visualize using external tools)
      - "page_label": page number in the original PDF document
      ```

## Document Store

- InMemoryDocumentStore

## Vector Store

- ChromaVectorStore
- InMemoryVectorStore


================================================
FILE: docs/development/index.md
================================================
{%
    include-markdown "../../README.md"
    start="<!-- start-intro -->"
    end="<!-- end-intro -->"
%}


================================================
FILE: docs/development/utilities.md
================================================
# Utilities

## Prompt engineering UI

![chat-ui](images/271332562-ac8f9aac-d853-4571-a48b-d866a99eaf3e.png)

**_Important:_** despite the name prompt engineering UI, this tool allows testers to test any kind of parameters that are exposed by developers. Prompt is one kind of param. There can be other type of params that testers can tweak (e.g. top_k, temperature...).

In the development process, developers typically build the pipeline. However, for use
cases requiring expertise in prompt creation, non-technical members (testers, domain experts) can be more
effective. To facilitate this, `kotaemon` offers a user-friendly prompt engineering UI
that developers integrate into their pipelines. This enables non-technical members to
adjust prompts and parameters, run experiments, and export results for optimization.

As of Sept 2023, there are 2 kinds of prompt engineering UI:

- Simple pipeline: run one-way from start to finish.
- Chat pipeline: interactive back-and-forth.

### Simple pipeline

For simple pipeline, the supported client project workflow looks as follow:

1. [tech] Build pipeline
2. [tech] Export pipeline to config: `$ kotaemon promptui export <module.path.piplineclass> --output <path/to/config/file.yml>`
3. [tech] Customize the config
4. [tech] Spin up prompt engineering UI: `$ kotaemon promptui run <path/to/config/file.yml>`
5. [non-tech] Change params, run inference
6. [non-tech] Export to Excel
7. [non-tech] Select the set of params that achieve the best output

The prompt engineering UI prominently involves from step 2 to step 7 (step 1 is normally
done by the developers, while step 7 happens exclusively in Excel file).

#### Step 2 - Export pipeline to config

Command:

```shell
$ kotaemon promptui export <module.path.piplineclass> --output <path/to/config/file.yml>
```

where:

- `<module.path.pipelineclass>` is a dot-separated path to the pipeline. For example, if your pipeline can be accessed with `from projectA.pipelines import AnsweringPipeline`, then this value is `projectA.pipelines.AnswerPipeline`.
- `<path/to/config/file.yml>` is the target file path that the config will be exported to. If the config file already exists, and contains information of other pipelines, the config of current pipeline will additionally be added. If it contains information of the current pipeline (in the past), the old information will be replaced.

By default, all params in a pipeline (including nested params) will be export to the configuration file. For params that you do not wish to expose to the UI, you can directly remove them from the config YAML file. You can also annotate those param with `ignore_ui=True`, and they will be ignored in the config generation process. Example:

```python
class Pipeline(BaseComponent):
    param1: str = Param(default="hello")
    param2: str = Param(default="goodbye", ignore_ui=True)
```

Declared as above, and `param1` will show up in the config YAML file, while `param2` will not.

#### Step 3 - Customize the config

developers can further edit the config file in this step to get the most suitable UI (step 4) with their tasks. The exported config will have this overall schema:

```yml
<module.path.pipelineclass1>:
  params: ... (Detail param information to initiate a pipeline. This corresponds to the pipeline init parameters.)
  inputs: ... (Detail the input of the pipeline e.g. a text prompt. This corresponds to the params of `run(...)` method.)
  outputs: ... (Detail the output of the pipeline e.g. prediction, accuracy... This is the output information we wish to see in the UI.)
  logs: ... (Detail what information should show up in the log.)
```

##### Input and params

The inputs section have the overall schema as follow:

```yml
inputs:
  <input-variable-name-1>:
    component: <supported-UI-component>
    params: # this section is optional)
      value: <default-value>
  <input-variable-name-2>: ... # similar to above
params:
  <param-variable-name-1>: ... # similar to those in the inputs
```

The list of supported prompt UI and their corresponding gradio UI components:

```python
COMPONENTS_CLASS = {
    "text": gr.components.Textbox,
    "checkbox": gr.components.CheckboxGroup,
    "dropdown": gr.components.Dropdown,
    "file": gr.components.File,
    "image": gr.components.Image,
    "number": gr.components.Number,
    "radio": gr.components.Radio,
    "slider": gr.components.Slider,
}
```

##### Outputs

The outputs are a list of variables that we wish to show in the UI. Since in Python, the function output doesn't have variable name, so output declaration is a little bit different than input and param declaration:

```yml
outputs:
  - component: <supported-UI-component>
    step: <name-of-pipeline-step>
    item: <jsonpath way to retrieve the info>
  - ... # similar to above
```

where:

- component: the same text string and corresponding Gradio UI as in inputs & params
- step: the pipeline step that we wish to look fetch and show output on the UI
- item: the jsonpath mechanism to get the targeted variable from the step above

##### Logs

The logs show a list of sheetname and how to retrieve the desired information.

```yml
logs:
  <logname>:
    inputs:
      - name: <column name>
        step: <the pipeline step that we would wish to see the input>
        variable: <the variable in the step>
      - ...
    outputs:
      - name: <column name>
        step: <the pipeline step that we would wish to see the output>
        item: <how to retrieve the output of that step>
```

#### Step 4 + 5 - Spin up prompt engineering UI + Perform prompt engineering

Command:

```shell
$ kotaemon promptui run <path/to/config/file.yml>
```

This will generate an UI as follow:

![Screenshot from 2023-09-20 12-20-31](images/269170198-9ac1b95a-b667-42e7-b318-98a1b805d6df.png)

where:

- The tabs at the top of the UI corresponds to the pipeline to do prompt engineering.
- The inputs and params tabs allow users to edit (these corresponds to the inputs and params in the config file).
- The outputs panel holds the UI elements to show the outputs defined in config file.
- The Run button: will execute pipeline with the supplied inputs and params, and render result in the outputs panel.
- The Export button: will export the logs of all the run to an Excel files users to inspect for best set of params.

#### Step 6 - Export to Excel

Upon clicking export, the users can download Excel file.

### Chat pipeline

Chat pipeline workflow is different from simple pipeline workflow. In simple pipeline, each Run creates a set of output, input and params for users to compare. In chat pipeline, each Run is not a one-off run, but a long interactive session. Hence, the workflow is as follow:

1. Set the desired parameters.
2. Click "New chat" to start a chat session with the supplied parameters. This set of parameters will persist until the end of the chat session. During an ongoing chat session, changing the parameters will not take any effect.
3. Chat and interact with the chat bot on the right panel. You can add any additional input (if any), and they will be supplied to the chatbot.
4. During chat, the log of the chat will show up in the "Output" tabs. This is empty by default, so if you want to show the log here, tell the AI developers to configure the UI settings.
5. When finishing chat, select your preference in the radio box. Click "End chat". This will save the chat log and the preference to disk.
6. To compare the result of different run, click "Export" to get an Excel spreadsheet summary of different run.


================================================
FILE: docs/extra/css/code_select.css
================================================
.language-pycon .gp,
.language-pycon .go {
  /* Generic.Prompt,  Generic.Output */
  user-select: none;
}


================================================
FILE: docs/index.md
================================================
# Getting Started with Kotaemon

![type:video](https://github.com/Cinnamon/kotaemon/assets/25688648/815ecf68-3a02-4914-a0dd-3f8ec7e75cd9)

This page is intended for **end users** who want to use the `kotaemon` tool for Question
Answering on local documents. If you are a **developer** who wants contribute to the project, please visit the [development](development/index.md) page.

## Installation (Online HuggingFace Space) - easy (10 mins)

Visit this [guide](online_install.md).

## Installation (Offline) - intermediate (20 mins)

### Download

Download the `kotaemon-app.zip` file from the [latest release](https://github.com/Cinnamon/kotaemon/releases/latest/).

### Run setup script

0. Unzip the downloaded file.
1. Navigate to the `scripts` folder and start an installer that matches your OS:
   - Windows: `run_windows.bat`. Just double click the file.
   - macOS: `run_macos.sh`
     1. Right click on your file and select Open with and Other.
     2. Enable All Applications and choose Terminal.
     3. NOTE: If you always want to open that file with Terminal, then check Always Open With.
     4. From now on, double click on your file and it should work.
   - Linux: `run_linux.sh`. Please run the script using `bash run_linux.sh` in your terminal.
2. After the installation, the installer will ask to launch the ktem's UI, answer to continue.
3. If launched, the application will be open automatically in your browser.
4. Default login information is: `username: admin / password: admin`. You should change this credential right after the first login on the UI.

## Launch

To launch the app after initial setup or any change, simply run the `run_*` script again.

A browser window will be opened and greets you with this screen:

![Chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)

## Usage

For how to use the application, see [Usage](usage.md). This page will also be available to
you within the application.

## Feedback

Feel free to create a bug report or a feature request on our [repo](https://github.com/Cinnamon/kotaemon/issues).


================================================
FILE: docs/local_model.md
================================================
# Setup local LLMs & Embedding models

## Prepare local models

#### NOTE

In the case of using Docker image, please replace `http://localhost` with `http://host.docker.internal` to correctly communicate with service on the host machine. See [more detail](https://stackoverflow.com/questions/31324981/how-to-access-host-port-from-docker-container).

### Ollama OpenAI compatible server (recommended)

Install [ollama](https://github.com/ollama/ollama) and start the application.

Pull your model (e.g):

```
ollama pull llama3.1:8b
ollama pull nomic-embed-text
```

Setup LLM and Embedding model on Resources tab with type OpenAI. Set these model parameters to connect to Ollama:

```
api_key: ollama
base_url: http://localhost:11434/v1/
model: gemma2:2b (for llm) | nomic-embed-text (for embedding)
```

![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/models.png)

### oobabooga/text-generation-webui OpenAI compatible server

Install [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui/).

Follow the setup guide to download your models (GGUF, HF).
Also take a look at [OpenAI compatible server](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API) for detail instructions.

Here is a short version

```
# install sentence-transformer for embeddings creation
pip install sentence_transformers
# change to text-generation-webui src dir
python server.py --api
```

Use the `Models` tab to download new model and press Load.

Setup LLM and Embedding model on Resources tab with type OpenAI. Set these model parameters to connect to `text-generation-webui`:

```
api_key: dummy
base_url: http://localhost:5000/v1/
model: any
```

### llama-cpp-python server (LLM only)

See [llama-cpp-python OpenAI server](https://llama-cpp-python.readthedocs.io/en/latest/server/).

Download any GGUF model weight on HuggingFace or other source. Place it somewhere on your local machine.

Run

```
LOCAL_MODEL=<path/to/GGUF> python scripts/serve_local.py
```

Setup LLM model on Resources tab with type OpenAI. Set these model parameters to connect to `llama-cpp-python`:

```
api_key: dummy
base_url: http://localhost:8000/v1/
model: model_name
```

## Use local models for RAG

- Set default LLM and Embedding model to a local variant.

![Models](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/llm-default.png)

- Set embedding model for the File Collection to a local model (e.g: `ollama`)

![Index](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/index-embedding.png)

- Go to Retrieval settings and choose LLM relevant scoring model as a local model (e.g: `ollama`). Or, you can choose to disable this feature if your machine cannot handle a lot of parallel LLM requests at the same time.

![Settings](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/retrieval-setting.png)

You are set! Start a new conversation to test your local RAG pipeline.


================================================
FILE: docs/online_install.md
================================================
## Installation (Online HuggingFace Space)

1. Go to [HF kotaemon_template](https://huggingface.co/spaces/cin-model/kotaemon_template).
2. Use Duplicate function to create your own space. Or use this [direct link](https://huggingface.co/spaces/cin-model/kotaemon_template?duplicate=true).
   ![Duplicate space](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/duplicate_space.png)
   ![Change space params](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/change_space_params.png)
3. Wait for the build to complete and start up (apprx 10 mins).
   ![Wait space build](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/space_build.png)
   ![Close space build](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/close_logs_space.png)
4. Follow the first setup instructions (and register for Cohere API key if needed).
   ![Cohere API](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/cohere_api_key.png)
5. Complete the setup and use your own private space!
   ![App Startup](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/initial_startup.png)


================================================
FILE: docs/pages/app/customize-flows.md
================================================
# Add new indexing and reasoning pipeline to the application

@trducng

At high level, to add new indexing and reasoning pipeline:

1. You define your indexing or reasoning pipeline as a class from
   `BaseComponent`.
2. You declare that class in the setting files `flowsettings.py`.

Then when `python app.py`, the application will dynamically load those
pipelines.

The below sections talk in more detail about how the pipelines should be
constructed.

## Define a pipeline as a class

In essence, a pipeline will subclass from `kotaemon.base.BaseComponent`.
Each pipeline has 2 main parts:

- All declared arguments and sub-pipelines.
- The logic inside the pipeline.

An example pipeline:

```python
from kotaemon.base import BaseComponent


class SoSimple(BaseComponent):
    arg1: int
    arg2: str

    def run(self, arg3: str):
        return self.arg1 * self.arg2 + arg3
```

This pipeline is simple for demonstration purpose, but we can imagine pipelines
with much more arguments, that can take other pipelines as arguments, and have
more complicated logic in the `run` method.

**_An indexing or reasoning pipeline is just a class subclass from
`BaseComponent` like above._**

For more detail on this topic, please refer to [Creating a
Component](/create-a-component/)

## Run signatures

**Note**: this section is tentative at the moment. We will finalize `def run`
function signature by latest early April.

The indexing pipeline:

```python
    def run(
        self,
        file_paths: str | Path | list[str | Path],
        reindex: bool = False,
        **kwargs,
    ):
        """Index files to intermediate representation (e.g. vector, database...)

        Args:
            file_paths: the list of paths to files
            reindex: if True, files in `file_paths` that already exists in database
                should be reindex.
        """
```

The reasoning pipeline:

```python
    def run(self, question: str, history: list, **kwargs) -> Document:
        """Answer the question

        Args:
            question: the user input
            history: the chat history [(user_msg1, bot_msg1), (user_msg2, bot_msg2)...]

        Returns:
            kotaemon.base.Document: the final answer
        """
```

## Register your pipeline to ktem

To register your pipelines to ktem, you declare it in the `flowsettings.py`
file. This file locates at the current working directory where you start the
ktem. In most use cases, it is this
[one](https://github.com/Cinnamon/kotaemon/blob/main/flowsettings.py).

```python
KH_REASONING = ["<python.module.path.to.the.reasoning.class>"]

KH_INDEX = "<python.module.path.to.the.indexing.class>"
```

You can register multiple reasoning pipelines to ktem by populating the
`KH_REASONING` list. The user can select which reasoning pipeline to use
in their Settings page.

For now, there's only one supported index option for `KH_INDEX`.

Make sure that your class is discoverable by Python.

## Allow users to customize your pipeline in the app settings

To allow the users to configure your pipeline, you need to declare what you
allow the users to configure as a dictionary. `ktem` will include them into the
application settings.

In your pipeline class, add a classmethod `get_user_settings` that returns a
setting dictionary, add a classmethod `get_info` that returns an info
dictionary. Example:

```python
class SoSimple(BaseComponent):

    ... # as above

    @classmethod
    def get_user_settings(cls) -> dict:
        """The settings to the user"""
        return {
            "setting_1": {
                "name": "Human-friendly name",
                "value": "Default value",
                "choices": [("Human-friendly Choice 1", "choice1-id"), ("HFC 2", "choice2-id")], # optional
                "component": "Which Gradio UI component to render, can be: text, number, checkbox, dropdown, radio, checkboxgroup"
            },
            "setting_2": {
                # follow the same rule as above
            }
        }

    @classmethod
    def get_info(cls) -> dict:
        """Pipeline information for bookkeeping purpose"""
        return {
            "id": "a unique id to differentiate this pipeline from other pipeline",
            "name": "Human-friendly name of the pipeline",
            "description": "Can be a short description of this pipeline"
        }
```

Once adding these methods to your pipeline class, `ktem` will automatically
extract and add them to the settings.

## Construct to pipeline object

Once `ktem` runs your pipeline, it will call your classmethod `get_pipeline`
with the full user settings and expect to obtain the pipeline object. Within
this `get_pipeline` method, you implement all the necessary logics to initiate
the pipeline object. Example:

```python
class SoSimple(BaseComponent):
    ... # as above

    @classmethod
    def get_pipeline(self, setting):
        obj = cls(arg1=setting["reasoning.id.setting1"])
        return obj
```

## Reasoning: Stream output to UI

For fast user experience, you can stream the output directly to UI. This way,
user can start observing the output as soon as the LLM model generates the 1st
token, rather than having to wait the pipeline finishes to read the whole message.

To stream the output, you need to;

1. Turn the `run` function to async.
2. Pass in the output to a special queue with `self.report_output`.

```python

    async def run(self, question: str, history: list, **kwargs) -> Document:
        for char in "This is a long messages":
            self.report_output({"output": text.text})
```

The argument to `self.report_output` is a dictionary, that contains either or
all of these 2 keys: "output", "evidence". The "output" string will be streamed
to the chat message, and the "evidence" string will be streamed to the
information panel.

## Access application LLMs, Embeddings

You can access users' collections of LLMs and embedding models with:

```python
from ktem.embeddings.manager import embeddings
from ktem.llms.manager import llms


llm = llms.get_default()
embedding_model = embeddings.get_default()
```

You can also allow the users to specifically select which llms or embedding
models they want to use through the settings.

```python
    @classmethod
    def get_user_settings(cls) -> dict:
        from ktem.llms.manager import llms

        return {
            "citation_llm": {
                "name": "LLM for citation",
                "value": llms.get_default(),
                "component: "dropdown",
                "choices": list(llms.options().keys()),
            },
            ...
        }
```

## Optional: Access application data

You can access the user's application database, vector store as follow:

```python
# get the database that contains the source files
from ktem.db.models import Source, Index, Conversation, User

# get the vector store
```


================================================
FILE: docs/pages/app/ext/user-management.md
================================================
`ktem` provides user management as an extension. To enable user management, in
your `flowsettings.py`, set the following variables:

- `KH_FEATURE_USER_MANAGEMENT`: True to enable.
- `KH_FEATURE_USER_MANAGEMENT_ADMIN`: the admin username. This user will be
  created when the app 1st start.
- `KH_FEATURE_USER_MANAGEMENT_PASSWORD`: the admin password. This value
  accompanies the admin username.

Once enabled, you have access to the following features:

- User login/logout (located in Settings Tab)
- User changing password (located in Settings Tab)
- Create / List / Edit / Delete user (located in Resources > Users Tab)


================================================
FILE: docs/pages/app/features.md
================================================
## Chat

The kotaemon focuses on question and answering over a corpus of data. Below
is the gentle introduction about the chat functionality.

- Users can upload corpus of files.
- Users can converse to the chatbot to ask questions about the corpus of files.
- Users can view the reference in the files.


================================================
FILE: docs/pages/app/functional-description.md
================================================
## User group / tenant management

### Create new user group

(6 man-days)

**Description**: each client has a dedicated user group. Each user group has an
admin user who can do administrative tasks (e.g. creating user account in that
user group...). The workflow for creating new user group is as follow:

1. Cinnamon accesses the user group management UI.
2. On "Create user group" panel, we supply:
   a. Client name: e.g. Apple.
   b. Sub-domain name: e.g. apple.
   c. Admin email, username & password.
3. The system will:
   a. An Aurora Platform deployment with the specified sub-domain.
   b. Send an email to the admin, with the username & password.

**Expectation**:

- The admin can go to the deployed Aurora Platform.
- The admin can login with the specified username & password.

**Condition**:

- When sub-domain name already exists, raise error.
- If error sending email to the client, raise the error, and delete the
  newly-created user-group.
- Password rule:
  - Have at least 8 characters.
  - Must contain uppercase, lowercase, number and symbols.

---

### Delete user group

(2 man-days)

**Description**: in the tenant management page, we can delete the selected user
group. The user flow is as follow:

1. Cinnamon accesses the user group management UI,
2. View list of user groups.
3. Next to target user group, click delete.
4. Confirm whether to delete.
5. If Yes, delete the user group. If No, cancel the operation.

**Expectation**: when a user group is deleted, we expect to delete everything
related to the user groups: domain, files, databases, caches, deployments.

## User management

---

### Create user account (for admin user)

(1 man-day)

**Description**: the admin user in the client's account can create user account
for that user group. To create the new user, the client admin do:

1. Navigate to "Admin" > "Users"
2. In the "Create user" panel, supply:
   - Username
   - Password
   - Confirm password
3. Click "Create"

**Expectation**:

- The user can create the account.
- The username:
  - Is case-insensitive (e.g. Moon and moon will be the same)
  - Can only contains these characters: a-z A-Z 0-9 \_ + - .
  - Has maximum length of 32 characters
- The password is subjected to the following rule:
  - 8-character minimum length
  - Contains at least 1 number
  - Contains at least 1 lowercase letter
  - Contains at least 1 uppercase letter
  - Contains at least 1 special character from the following set, or a
    non-leading, non-trailing space character: `^ $ * . [ ] { } ( ) ? - " ! @ # % & / \ , > < ' : ; | _ ~ ` + =

---

### Delete user account (for admin user)

**Description**: the admin user in the client's account can delete user account.
Once an user account is deleted, he/she cannot login to Aurora Platform.

1. The admin user navigates to "Admin" > "Users".
2. In the user list panel, next to the username, the admin click on the "Delete"
   button. The Confirmation dialog appears.
3. If "Delete", the user account is deleted. If "Cancel", do nothing. The
   Confirmation dialog disappears.

**Expectation**:

- Once the user is deleted, the following information relating to the user will
  be deleted:
  - His/her personal setting.
  - His/her conversations.
- The following information relating to the user will still be retained:
  - His/her uploaded files.

---

### Edit user account (for admin user)

**Description**: the admin user can change any information about the user
account, including password. To change user information:

1. The admin user navigates to "Admin" > "Users".
2. In the user list panel, next to the username, the admin click on the "Edit"
   button.
3. The user list disappears, the user detail appears, with the following
   information show up:
   - Username: (prefilled the username)
   - Password: (blank)
   - Confirm password: (blank)
4. The admin can edit any of the information, and click "Save" or "Cancel".
   - If "Save": the information will be updated to the database, or show
     error per Expectation below.
   - If "Cancel": skip.
5. If Save success or Cancel, transfer back to the user list UI, where the user
   information is updated accordingly.

**Expectation**:

- If the "Password" & "Confirm password" are different from each other, show
  error: "Password mismatch".
- If both "Password" & \*"Confirm password" are blank, don't change the user
  password.
- If changing password, the password rule is subjected to the same rule when
  creating user.
- It's possible to change username. If changing username, the target user has to
  use the new username.

---

### Sign-in

(3 man-days)

**Description**: the users can sign-in to Aurora Platform as follow:

1. User navigates to the URL.
2. If the user is not logged in, the UI just shows the login screen.
3. User types username & password.
4. If correct, the user will proceed to normal working UI.
5. If incorrect, the login screen shows text error.

---

### Sign-out

(1 man-day)

**Description**: the user can sign-out of Aurora Platform as follow:

1. User navigates to the Settings > User page.
2. User click on logout.
3. The user is signed out to the UI login screen.

**Expectation**: the user is completely signed out. Next time he/she uses the
Aurora Platform, he/she has to login again.

---

### Change password

**Description**: the user can change their password as follow:

1. User navigates to the Settings > User page.
2. In the change password section, the user provides these info and click
   Change:
   - Current password
   - New password
   - Confirm new password
3. If changing successfully, then the password is changed. Otherwise, show the
   error on the UI.

**Expectation**:

- If changing password succeeds, next time they logout/login to the system, they
  can use the new password.
- Password rule (Same as normal password rule when creating user)
- Errors:
  - Password does not match.
  - Violated password rules.

---

## Chat

### Chat to the bot

**Description**: the Aurora Platform focuses on question and answering over the
uploaded data. Each chat has the following components:

- Chat message: show the exchange between bots and humans.
- Text input + send button: for the user to input the message.
- Data source panel: for selecting the files that will scope the context for the
  bot.
- Information panel: showing evidence as the bot answers user's questions.

The chat workflow looks as follow:

1. [Optional] User select files that they want to scope the context for the bot.
   If the user doesn't select any files, then all files on Aurora Platform will
   be the context for the bot.
   - The user can type multi-line messages, using "Shift + Enter" for
     line-break.
2. User sends the message (either clicking the Send button or hitting the Enter
   key).
3. The bot in the chat conversation will return "Thinking..." while it
   processes.
4. The information panel on the right begin to show data related to the user
   message.
5. The bot begins to generate answer. The "Thinking..." placeholder disappears..

**Expecatation**:

- Messages:
  - User can send multi-line messages, using "Shift + Enter" for line-break.
  - User can thumbs up, thumbs down the AI response. This information is
    recorded in the database.
  - User can click on a copy button on the chat message to copy the content to
    clipboard.
- Information panel:
  - The information panel shows the latest evidence.
  - The user can click on the message, and the reference for that message will
    show up on the "Reference panel" (feature in-planning).
  - The user can click on the title to show/hide the content.
  - The whole information panel can be collapsed.
- Chatbot quality:
  - The user can converse with the bot. The bot answer the user's requests in a
    natural manner.
  - The bot message should be streamed to the UI. The bot don't wait to gather
    alll the text response, then dump all of them at once.

### Conversation - switch

**Description**: users can jump around between different conversations. They can
see the list of all conversations, can select an old converation, and continue
the chat under the context of the old conversation. The switching workflow is
like this:

1. Users click on the conversation dropdown. It will show a list of
   conversations.
2. Within that dropdown, the user selects one conversation.
3. The chat messages, information panel, and selected data will show the content
   in that old chat.
4. The user can continue chatting as normal under the context of this old chat.

**Expectation**:

- In the conversation drop down list, the conversations are ordered in created
  date order.
- When there is no conversation, the conversation list is empty.
- When there is no conversation, the user can still converse with the chat bot.
  When doing so, it automatically create new conversation.

### Conversation - create

**Description**: the user can explicitly start a new conversation with the
chatbot:

1. User click on the "New" button.
2. The new conversation is automatically created.

**Expectation**:

- The default conversation name is the current datetime.
- It become selected.
- It is added to the conversation list.

### Conversation - rename

**Description**: user can rename the chatbot by typing the name, and click on
the Rename button next to it.

- If rename succeeds: the name shown in the 1st dropdown will change accordingly
- If rename doesn't succeed: show error message in red color below the rename section

**Condition**:

- Name constraint:
  - Min characters: 1
  - Max characters: 40
  - Could not having the same name with an existing conversation of the same
    user.

### Conversation - delete

**Description**: user can delete the existing conversation as follow:

1. Click on Delete button.
2. The UI show confirmation with 2 buttons:
   - Delete
   - Cancel.
3. If Delete, delete the conversation, switch to the next oldest conversation,
   close the confirmation panel.
4. If cancel, just close the confirmation panel.

## File management

The file management allows users to upload, list and delete files that they
upload to the Aurora Platform

### Upload file

**Description**: the user can upload files to the Aurora Platform. The uploaded
files will be served as context for our chatbot to refer to when it converses
with the user. To upload file, the user:

1. Navigate to the File tab.
2. Within the File tab, there is an Upload section.
3. User can add files to the Upload section through drag & drop, and or by click
   on the file browser.
4. User can select some options relating to uploading and indexing. Depending on
   the project, these options can be different. Nevertheless, they will discuss
   below.
5. User click on "Upload and Index" button.
6. The app show notifications when indexing starts and finishes, and when errors
   happen on the top right corner.

**Options**:

- Force re-index file. When user tries to upload files that already exists on
  the system:
  - If this option is True: will re-index those files.
  - If this option is False: will skip indexing those files.

**Condition**:

- Max number of files: 100 files.
- Max number of pages per file: 500 pages
- Max file size: 10 MB

### List all files

**Description**: the user can know which files are on the system by:

1. Navigate to the File tab.
2. By default, it will show all the uploaded files, each with the following
   information: file name, file size, number of pages, uploaded date
3. The UI also shows total number of pages, and total number of sizes in MB.

### Delete file

**Description**: users can delete files from this UI to free up the space, or to
remove outdated information. To remove the files:

1. User navigate to the File tab.
2. In the list of file, next to each file, there is a Delete button.
3. The user clicks on the Delete button. Confirmation dialog appear.
4. If Delete, delete the file. If Cancel, close the confirmation dialog.

**Expectation**: once the file is deleted:

- The database entry of that file is deleted.
- The file is removed from "Chat - Data source".
- The total number of pages and MB sizes are reduced accordingly.
- The reference to the file in the information panel is still retained.


================================================
FILE: docs/pages/app/index/file.md
================================================
The file index stores files in a local folder and index them for retrieval.
This file index provides the following infrastructure to support the indexing:

- SQL table Source: store the list of files that are indexed by the system
- Vector store: contain the embedding of segments of the files
- Document store: contain the text of segments of the files. Each text stored
  in this document store is associated with a vector in the vector store.
- SQL table Index: store the relationship between (1) the source and the
  docstore, and (2) the source and the vector store.

The indexing and retrieval pipelines are encouraged to use the above software
infrastructure.

## Indexing pipeline

The ktem has default indexing pipeline: `ktem.index.file.pipelines.IndexDocumentPipeline`.

This default pipeline works as follow:

- **Input**: list of file paths
- **Output**: list of nodes that are indexed into database
- **Process**:
  - Read files into texts. Different file types has different ways to read texts.
  - Split text files into smaller segments
  - Run each segments into embeddings.
  - Store the embeddings into vector store. Store the texts of each segment
    into docstore. Store the list of files in Source. Store the linking
    between Sources and docstore + vectorstore in Index table.

You can customize this default pipeline if your indexing process is close to the
default pipeline. You can create your own indexing pipeline if there are too
much different logic.

### Customize the default pipeline

The default pipeline provides the contact points in `flowsettings.py`.

1. `FILE_INDEX_PIPELINE_FILE_EXTRACTORS`. Supply overriding file extractor,
   based on file extension. Example: `{".pdf": "path.to.PDFReader", ".xlsx": "path.to.ExcelReader"}`
2. `FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE`. The expected number of characters
   of each text segment. Example: 1024.
3. `FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP`. The expected number of
   characters that consecutive text segments should overlap with each other.
   Example: 256.

### Create your own indexing pipeline

Your indexing pipeline will subclass `BaseFileIndexIndexing`.

You should define the following methods:

- `run(self, file_paths)`: run the indexing given the pipeline
- `get_pipeline(cls, user_settings, index_settings)`: return the
  fully-initialized pipeline, ready to be used by ktem.
  - `user_settings`: is a dictionary contains user settings (e.g. `{"pdf_mode": True, "num_retrieval": 5}`). You can declare these settings in the `get_user_settings` classmethod. ktem will collect these settings into the app Settings page, and will supply these user settings to your `get_pipeline` method.
  - `index_settings`: is a dictionary. Currently it's empty for File Index.
- `get_user_settings`: to declare user settings, return a dictionary.

By subclassing `BaseFileIndexIndexing`, You will have access to the following resources:

- `self._Source`: the source table
- `self._Index`: the index table
- `self._VS`: the vector store
- `self._DS`: the docstore

Once you have prepared your pipeline, register it in `flowsettings.py`: `FILE_INDEX_PIPELINE = "<python.path.to.your.pipeline>"`.

## Retrieval pipeline

The ktem has default retrieval pipeline:
`ktem.index.file.pipelines.DocumentRetrievalPipeline`. This pipeline works as
follow:

- Input: user text query & optionally a list of source file ids
- Output: the output segments that match the user text query
- Process:
  - If a list of source file ids is given, get the list of vector ids that
    associate with those file ids.
  - Embed the user text query.
  - Query the vector store. Provide a list of vector ids to limit query scope
    if the user restrict.
  - Return the matched text segments

### Create your own retrieval pipeline

Your retrieval pipeline will subclass `BaseFileIndexRetriever`. The retriever
has the same database, vectorstore and docstore accesses like the indexing
pipeline.

You should define the following methods:

- `run(self, query, file_ids)`: retrieve relevant documents relating to the
  query. If `file_ids` is given, you should restrict your search within these
  `file_ids`.
- `get_pipeline(cls, user_settings, index_settings, selected)`: return the
  fully-initialized pipeline, ready to be used by ktem.
  - `user_settings`: is a dictionary contains user settings (e.g. `{"pdf_mode": True, "num_retrieval": 5}`). You can declare these settings in the `get_user_settings` classmethod. ktem will collect these settings into the app Settings page, and will supply these user settings to your `get_pipeline` method.
    - `index_settings`: is a dictionary. Currently it's empty for File Index.
    - `selected`: a list of file ids selected by user. If user doesn't select
      anything, this variable will be None.
- `get_user_settings`: to declare user settings, return a dictionary.

Once you build the retrieval pipeline class, you can register it in
`flowsettings.py`: `FILE_INDEXING_RETRIEVER_PIPELIENS = ["path.to.retrieval.pipelie"]`. Because there can be
multiple parallel pipelines within an index, this variable takes a list of
string rather than a string.

## Software infrastructure

| Infra            | Access        | Schema                                                                                                                                                                                                                                                                                      | Ref                                                        |
| ---------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
| SQL table Source | self.\_Source | - id (int): id of the source (auto)<br>- name (str): the name of the file<br>- path (str): the path of the file<br>- size (int): the file size in bytes<br>- note (dict): allow extra optional information about the file<br>- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult                  |
| SQL table Index  | self.\_Index  | - id (int): id of the index entry (auto)<br>- source_id (int): the id of a file in the Source table<br>- target_id: the id of the segment in docstore or vector store<br>- relation_type (str): if the link is "document" or "vector"                                                       | This is SQLAlchemy ORM class                               |
| Vector store     | self.\_VS     | - self.\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)<br>- self.\_VS.delete: delete vector entries based on ids<br>- self.\_VS.query: get embeddings based on embeddings.                                                                 | kotaemon > storages > vectorstores > BaseVectorStore       |
| Doc store        | self.\_DS     | - self.\_DS.add: add the segments to document stores<br>- self.\_DS.get: get the segments based on id<br>- self.\_DS.get_all: get all segments<br>- self.\_DS.delete: delete segments based on id                                                                                           | kotaemon > storages > docstores > base > BaseDocumentStore |


================================================
FILE: docs/pages/app/settings/overview.md
================================================
# Overview

There are 3 kinds of settings in `ktem`, geared towards different stakeholders
for different use cases:

- Developer settings. These settings are meant for very basic app customization, such as database URL, cloud config, logging config, which features to enable... You will be interested in the developer settings if you deploy `ktem` to your customers, or if you build extension for `ktem` for developers. These settings are declared inside `flowsettings.py`.
- Admin settings. These settings show up in the Admin page, and are meant to allow admin-level user to customize low level features, such as which credentials to connect to data sources, which keys to use for LLM...
- [User settings](/pages/app/settings/user-settings/). These settings are meant for run-time users to tweak ktem to their personal needs, such as which output languages the chatbot should generate, which reasoning type to use...


================================================
FILE: docs/pages/app/settings/user-settings.md
================================================
# User settings

`ktem` allows developers to extend the index and the reasoning pipeline. In
many cases, these components can have settings that should be modified by
users at run-time, (e.g. `topk`, `chunksize`...). These are the user settings.

`ktem` allows developers to declare such user settings in their code. Once
declared, `ktem` will render them in a Settings page.

There are 2 places that `ktem` looks for declared user settings. You can
refer to the respective pages.

- In the index.
- In the reasoning pipeline.

## Syntax of a settings

A collection of settings is a dictionary of type `dict[str, dict]`, where the
key is a setting id, and the value is the description of the setting.

```python
settings = {
    "topk": {
        "name": "Top-k chunks",
        "value": 10,
        "component": "number",
    },
    "lang": {
        "name": "Languages",
        "value": "en",
        "component": "dropdown",
        "choices": [("en", "English"), ("cn", "Chinese")],
    }
}
```

Each setting description must have:

- name: the human-understandable name of the settings.
- value: the default value of the settings.
- component: the UI component to render such setting on the UI. Available:

  - "text": single-value
  - "number": single-value
  - "checkbox": single-value
  - "dropdown": choices
  - "radio": choices
  - "checkboxgroup": choices

- choices: the list of choices, if the component type allows.

## Settings page structure


================================================
FILE: docs/scripts/generate_examples_docs.py
================================================
# import shutil
from pathlib import Path
from typing import Any, Iterable

import mkdocs_gen_files

# get the root source code directory
doc_dir_name = "docs"
doc_dir = Path(__file__)
while doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent:
    doc_dir = doc_dir.parent

if doc_dir == doc_dir.parent:
    raise ValueError(f"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).")


def generate_docs_for_examples_readme(
    examples_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = []
):
    if not examples_dir.is_dir():
        raise ModuleNotFoundError(str(examples_dir))

    nav = mkdocs_gen_files.Nav()

    for path in sorted(examples_dir.rglob("*README.md")):
        # ignore modules with name starts with underscore (i.e. __init__)
        if path.name.startswith("_") or path.name.startswith("test"):
            continue

        module_path = path.parent.relative_to(examples_dir).with_suffix("")
        doc_path = path.parent.relative_to(examples_dir).with_suffix(".md")
        full_doc_path = Path(target_doc_folder, doc_path)

        parts = list(module_path.parts)
        identifier = ".".join(parts)

        if "tests" in parts:
            continue

        ignore = False
        for each_module in ignored_modules:
            if identifier.startswith(each_module):
                ignore = True
                break
        if ignore:
            continue

        nav_titles = [name.replace("_", " ").title() for name in parts]
        nav[nav_titles] = doc_path.as_posix()

        with mkdocs_gen_files.open(full_doc_path, "w") as f:
            f.write(f'--8<-- "{path.relative_to(examples_dir.parent)}"')

        mkdocs_gen_files.set_edit_path(
            full_doc_path, Path("..") / path.relative_to(examples_dir.parent)
        )

    with mkdocs_gen_files.open(f"{target_doc_folder}/NAV.md", "w") as nav_file:
        nav_file.writelines(nav.build_literate_nav())


generate_docs_for_examples_readme(
    examples_dir=doc_dir.parent / "examples",
    target_doc_folder="examples",
)


================================================
FILE: docs/scripts/generate_reference_docs.py
================================================
# import shutil
from pathlib import Path
from typing import Any, Iterable

import mkdocs_gen_files

# get the root source code directory
doc_dir_name = "docs"
doc_dir = Path(__file__)
while doc_dir.name != doc_dir_name and doc_dir != doc_dir.parent:
    doc_dir = doc_dir.parent

if doc_dir == doc_dir.parent:
    raise ValueError(f"root_name ({doc_dir_name}) not in path ({str(Path(__file__))}).")

nav_title_map = {"cli": "CLI", "llms": "LLMs"}


def generate_docs_for_src_code(
    code_dir: Path, target_doc_folder: str, ignored_modules: Iterable[Any] = []
):
    if not code_dir.is_dir():
        raise ModuleNotFoundError(str(code_dir))

    nav = mkdocs_gen_files.Nav()

    for path in sorted(code_dir.rglob("*.py")):
        # ignore modules with name starts with underscore (i.e. __init__)
        # if path.name.startswith("_") or path.name.startswith("test"):
        #     continue

        module_path = path.relative_to(code_dir).with_suffix("")
        doc_path = path.relative_to(code_dir).with_suffix(".md")
        full_doc_path = Path(target_doc_folder, doc_path)

        parts = list(module_path.parts)

        if parts[-1] == "__init__":
            doc_path = doc_path.with_name("index.md")
            full_doc_path = full_doc_path.with_name("index.md")
            parts.pop()

        if not parts:
            continue

        if "tests" in parts:
            continue

        identifier = ".".join(parts)
        ignore = False
        for each_module in ignored_modules:
            if identifier.startswith(each_module):
                ignore = True
                break
        if ignore:
            continue

        nav_titles = [
            nav_title_map.get(name, name.replace("_", " ").title()) for name in parts
        ]
        nav[nav_titles] = doc_path.as_posix()

        with mkdocs_gen_files.open(full_doc_path, "w") as f:
            f.write(f"::: {identifier}")

        # this method works in docs folder
        mkdocs_gen_files.set_edit_path(
            full_doc_path, Path("..") / path.relative_to(code_dir.parent)
        )

    with mkdocs_gen_files.open(f"{target_doc_folder}/Summary.md", "w") as nav_file:
        nav_file.writelines(nav.build_literate_nav())


generate_docs_for_src_code(
    code_dir=doc_dir.parent / "libs" / "kotaemon" / "kotaemon",
    target_doc_folder="reference",
    ignored_modules={"contribs"},
)


================================================
FILE: docs/theme/assets/pymdownx-extras/extra-fb5a2a1c86.css
================================================
@charset "UTF-8";:root>*{--md-code-link-bg-color:hsla(0, 0%, 96%, 1);--md-code-link-accent-bg-color:var(--md-code-link-bg-color);--md-default-bg-color--trans:rgb(100%, 100%, 100%, 0);--md-code-title-bg-color:var(--md-code-bg-color);--md-code-inline-bg-color:var(--md-code-bg-color);--md-code-special-bg-color:#e8e8e8;--md-code-alternate-bg-color:var(--md-code-bg-color);--md-code-hl-punctuation-color:var(--md-code-fg-color);--md-code-hl-namespace-color:var(--md-code-fg-color);--md-code-hl-entity-color:var(--md-code-hl-keyword-color);--md-code-hl-tag-color:var(--md-code-hl-keyword-color);--md-code-hl-builtin-color:var(--md-code-hl-constant-color);--md-code-hl-class-color:var(--md-code-hl-function-color);--md-typeset-a-color:#00bcd4;--md-progress-stripe:var(--md-default-bg-color--lighter);--md-progress-100:#00e676;--md-progress-80:#00e676;--md-progress-60:#fbc02d;--md-progress-40:#ff9100;--md-progress-20:#ff5252;--md-progress-0:#ff1744;--md-typeset-kbd-color:#ebebeb;--md-typeset-kbd-border-color:#b8b8b8;--md-typeset-kbd-accent-color:hsla(0, 100%, 100%, 1)}:root>[data-md-color-scheme=slate]{--md-code-link-bg-color:hsla(232, 15%, 15%, 1);--md-code-link-accent-bg-color:var(--md-code-link-bg-color);--md-code-special-bg-color:#2b2d3b;--md-default-bg-color--trans:hsla(232,15%,15%, 0);--md-typeset-kbd-color:var(--md-default-fg-color--lightest);--md-typeset-kbd-border-color:#1a1c24;--md-typeset-kbd-accent-color:var(--md-default-fg-color--lighter)}:root>[data-md-color-scheme=dracula]{--md-default-fg-color:rgba(248, 248, 242, 0.87);--md-default-fg-color--light:rgba(248, 248, 242, 0.54);--md-default-fg-color--lighter:rgba(248, 248, 242, 0.16);--md-default-fg-color--lightest:rgba(248, 248, 242, 0.07);--md-default-autocomplete-fg-color:rgba(248, 248, 242, 0.4);--md-shadow-z2:0 0.2rem 0.5rem hsla(0, 0%, 0%, 0.3),0 0 0.05rem hsla(0, 0%, 0%, 0.2);--md-default-bg-color:var(--md-default-bg-color--darkest);--md-default-bg-color--light:rgba(50, 52, 67, 0.7);--md-default-bg-color--lighter:rgba(50, 52, 67, 0.3);--md-default-bg-color--lightest:rgba(50, 52, 67, 0.12);--md-default-bg-color--trans:rgba(50, 52, 67, 0);--md-default-bg-color--dark:#2b2e3b;--md-default-bg-color--darker:#252732;--md-default-bg-color--darkest:#1e2029;--md-default-bg-color--ultra-dark:#111217;--md-text-color:var(--md-default-fg-color);--md-typeset-color:var(--md-default-fg-color);--md-admonition-fg-color:var(--md-default-fg-color);--md-code-fg-color:hsl(60, 30%, 96%);--md-code-bg-color:hsl(231, 15%, 18%);--md-code-title-bg-color:var(--md-default-bg-color--ultra-dark);--md-code-inline-bg-color:#323443;--md-code-hl-operator-color:hsl(326, 100%, 74%);--md-code-hl-punctuation-color:hsl(60, 30%, 96%);--md-code-hl-string-color:hsl(65, 92%, 76%);--md-code-hl-special-color:hsl(265, 89%, 78%);--md-code-hl-number-color:hsl(265, 89%, 78%);--md-code-hl-keyword-color:hsl(326, 100%, 74%);--md-code-hl-name-color:hsl(60, 30%, 96%);--md-code-hl-constant-color:hsl(265, 89%, 78%);--md-code-hl-function-color:hsl(135, 94%, 65%);--md-code-hl-comment-color:hsl(225, 27%, 51%);--md-code-hl-variable-color:hsl(31, 100%, 71%);--md-code-hl-generic-color:hsl(225, 27%, 51%);--md-code-hl-color:hsl(231, 25%, 25%);--md-code-hl-entity-color:hsl(135, 94%, 65%);--md-code-hl-tag-color:hsl(326, 100%, 74%);--md-code-hl-namespace-color:hsl(60, 30%, 96%);--md-code-hl-builtin-color:hsl(191, 97%, 77%);--md-code-hl-class-color:hsl(191, 97%, 77%);--md-code-special-bg-color:#1c1e26;--md-code-alternate-bg-color:#3d3e49;--md-code-link-bg-color:#364653;--md-typeset-a-color:hsl(191, 97%, 77%);--md-typeset-mark-color:#6e7252;--md-typeset-del-color:#734568;--md-typeset-ins-color:#36724e;--md-progress-stripe:var(--md-default-bg-color--lightest);--md-progress-100:hsl(135, 94%, 65%);--md-progress-80:hsl(135, 92%, 79%);--md-progress-60:hsl(65, 92%, 76%);--md-progress-40:hsl(31, 100%, 71%);--md-progress-20:hsl(326, 100%, 74%);--md-progress-0:hsl(0, 100%, 67%);--md-typeset-kbd-color:var(--md-default-fg-color--lightest);--md-typeset-kbd-border-color:var(--md-default-bg-color--ultra-dark);--md-typeset-kbd-accent-color:var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=red],[data-md-color-scheme=dracula][data-md-color-primary=red]{--md-primary-code-bg-color:#47303a;--md-primary-fg-color:hsla(0deg, 100%, 67%, 1);--md-primary-fg-color--transparent:hsla(0deg, 100%, 67%, 0.1);--md-primary-fg-color--light:hsla(0deg, 100%, 72%, 1);--md-primary-fg-color--dark:hsla(0deg, 100%, 62%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=pink],[data-md-color-scheme=dracula][data-md-color-primary=pink]{--md-primary-code-bg-color:#47354b;--md-primary-fg-color:hsla(326deg, 100%, 74%, 1);--md-primary-fg-color--transparent:hsla(326deg, 100%, 74%, 0.1);--md-primary-fg-color--light:hsla(326deg, 100%, 79%, 1);--md-primary-fg-color--dark:hsla(326deg, 100%, 69%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=purple],[data-md-color-scheme=dracula][data-md-color-primary=purple]{--md-primary-code-bg-color:#3e3952;--md-primary-fg-color:hsla(265deg, 89%, 78%, 1);--md-primary-fg-color--transparent:hsla(265deg, 89%, 78%, 0.1);--md-primary-fg-color--light:hsla(265deg, 89%, 83%, 1);--md-primary-fg-color--dark:hsla(265deg, 89%, 73%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-purple],[data-md-color-scheme=dracula][data-md-color-primary=deep-purple]{--md-primary-code-bg-color:#3e3952;--md-primary-fg-color:hsla(265deg, 89%, 78%, 1);--md-primary-fg-color--transparent:hsla(265deg, 89%, 78%, 0.1);--md-primary-fg-color--light:hsla(265deg, 89%, 83%, 1);--md-primary-fg-color--dark:hsla(265deg, 89%, 73%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=blue],[data-md-color-scheme=dracula][data-md-color-primary=blue]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=indigo],[data-md-color-scheme=dracula][data-md-color-primary=indigo]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-blue],[data-md-color-scheme=dracula][data-md-color-primary=light-blue]{--md-primary-code-bg-color:#303446;--md-primary-fg-color:hsla(225deg, 27%, 51%, 1);--md-primary-fg-color--transparent:hsla(225deg, 27%, 51%, 0.1);--md-primary-fg-color--light:hsla(225deg, 27%, 56%, 1);--md-primary-fg-color--dark:hsla(225deg, 27%, 46%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=cyan],[data-md-color-scheme=dracula][data-md-color-primary=cyan]{--md-primary-code-bg-color:#364653;--md-primary-fg-color:hsla(191deg, 97%, 77%, 1);--md-primary-fg-color--transparent:hsla(191deg, 97%, 77%, 0.1);--md-primary-fg-color--light:hsla(191deg, 97%, 82%, 1);--md-primary-fg-color--dark:hsla(191deg, 97%, 72%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=teal],[data-md-color-scheme=dracula][data-md-color-primary=teal]{--md-primary-code-bg-color:#364653;--md-primary-fg-color:hsla(191deg, 97%, 77%, 1);--md-primary-fg-color--transparent:hsla(191deg, 97%, 77%, 0.1);--md-primary-fg-color--light:hsla(191deg, 97%, 82%, 1);--md-primary-fg-color--dark:hsla(191deg, 97%, 72%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=green],[data-md-color-scheme=dracula][data-md-color-primary=green]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-green],[data-md-color-scheme=dracula][data-md-color-primary=light-green]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=lime],[data-md-color-scheme=dracula][data-md-color-primary=lime]{--md-primary-code-bg-color:#2d4840;--md-primary-fg-color:hsla(135deg, 94%, 65%, 1);--md-primary-fg-color--transparent:hsla(135deg, 94%, 65%, 0.1);--md-primary-fg-color--light:hsla(135deg, 94%, 70%, 1);--md-primary-fg-color--dark:hsla(135deg, 94%, 60%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=yellow],[data-md-color-scheme=dracula][data-md-color-primary=yellow]{--md-primary-code-bg-color:#454842;--md-primary-fg-color:hsla(65deg, 92%, 76%, 1);--md-primary-fg-color--transparent:hsla(65deg, 92%, 76%, 0.1);--md-primary-fg-color--light:hsla(65deg, 92%, 81%, 1);--md-primary-fg-color--dark:hsla(65deg, 92%, 71%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=amber],[data-md-color-scheme=dracula][data-md-color-primary=amber]{--md-primary-code-bg-color:#454842;--md-primary-fg-color:hsla(65deg, 92%, 76%, 1);--md-primary-fg-color--transparent:hsla(65deg, 92%, 76%, 0.1);--md-primary-fg-color--light:hsla(65deg, 92%, 81%, 1);--md-primary-fg-color--dark:hsla(65deg, 92%, 71%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=orange],[data-md-color-scheme=dracula][data-md-color-primary=orange]{--md-primary-code-bg-color:#473e3d;--md-primary-fg-color:hsla(31deg, 100%, 71%, 1);--md-primary-fg-color--transparent:hsla(31deg, 100%, 71%, 0.1);--md-primary-fg-color--light:hsla(31deg, 100%, 76%, 1);--md-primary-fg-color--dark:hsla(31deg, 100%, 66%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-orange],[data-md-color-scheme=dracula][data-md-color-primary=deep-orange]{--md-primary-code-bg-color:#473e3d;--md-primary-fg-color:hsla(31deg, 100%, 71%, 1);--md-primary-fg-color--transparent:hsla(31deg, 100%, 71%, 0.1);--md-primary-fg-color--light:hsla(31deg, 100%, 76%, 1);--md-primary-fg-color--dark:hsla(31deg, 100%, 66%, 1);--md-primary-bg-color:var(--md-default-bg-color);--md-primary-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=red],[data-md-color-scheme=dracula][data-md-color-accent=red]{--md-code-link-accent-bg-color:#472c36;--md-accent-fg-color:hsla(0deg, 100%, 62%, 1);--md-accent-fg-color--transparent:hsla(0deg, 100%, 62%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=pink],[data-md-color-scheme=dracula][data-md-color-accent=pink]{--md-code-link-accent-bg-color:#473149;--md-accent-fg-color:hsla(326deg, 100%, 69%, 1);--md-accent-fg-color--transparent:hsla(326deg, 100%, 69%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=purple],[data-md-color-scheme=dracula][data-md-color-accent=purple]{--md-code-link-accent-bg-color:#3c3652;--md-accent-fg-color:hsla(265deg, 89%, 73%, 1);--md-accent-fg-color--transparent:hsla(265deg, 89%, 73%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-purple],[data-md-color-scheme=dracula][data-md-color-accent=deep-purple]{--md-code-link-accent-bg-color:#3c3652;--md-accent-fg-color:hsla(265deg, 89%, 73%, 1);--md-accent-fg-color--transparent:hsla(265deg, 89%, 73%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=blue],[data-md-color-scheme=dracula][data-md-color-accent=blue]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=indigo],[data-md-color-scheme=dracula][data-md-color-accent=indigo]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-blue],[data-md-color-scheme=dracula][data-md-color-accent=light-blue]{--md-code-link-accent-bg-color:#2e3243;--md-accent-fg-color:hsla(225deg, 27%, 46%, 1);--md-accent-fg-color--transparent:hsla(225deg, 27%, 46%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=cyan],[data-md-color-scheme=dracula][data-md-color-accent=cyan]{--md-code-link-accent-bg-color:#324553;--md-accent-fg-color:hsla(191deg, 97%, 72%, 1);--md-accent-fg-color--transparent:hsla(191deg, 97%, 72%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=teal],[data-md-color-scheme=dracula][data-md-color-accent=teal]{--md-code-link-accent-bg-color:#324553;--md-accent-fg-color:hsla(191deg, 97%, 72%, 1);--md-accent-fg-color--transparent:hsla(191deg, 97%, 72%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=green],[data-md-color-scheme=dracula][data-md-color-accent=green]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=light-green],[data-md-color-scheme=dracula][data-md-color-accent=light-green]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=lime],[data-md-color-scheme=dracula][data-md-color-accent=lime]{--md-code-link-accent-bg-color:#2a483d;--md-accent-fg-color:hsla(135deg, 94%, 60%, 1);--md-accent-fg-color--transparent:hsla(135deg, 94%, 60%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=yellow],[data-md-color-scheme=dracula][data-md-color-accent=yellow]{--md-code-link-accent-bg-color:#45483e;--md-accent-fg-color:hsla(65deg, 92%, 71%, 1);--md-accent-fg-color--transparent:hsla(65deg, 92%, 71%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=amber],[data-md-color-scheme=dracula][data-md-color-accent=amber]{--md-code-link-accent-bg-color:#45483e;--md-accent-fg-color:hsla(65deg, 92%, 71%, 1);--md-accent-fg-color--transparent:hsla(65deg, 92%, 71%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=orange],[data-md-color-scheme=dracula][data-md-color-accent=orange]{--md-code-link-accent-bg-color:#473d39;--md-accent-fg-color:hsla(31deg, 100%, 66%, 1);--md-accent-fg-color--transparent:hsla(31deg, 100%, 66%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] :not([data-md-color-scheme])[data-md-color-primary=deep-orange],[data-md-color-scheme=dracula][data-md-color-accent=deep-orange]{--md-code-link-accent-bg-color:#473d39;--md-accent-fg-color:hsla(31deg, 100%, 66%, 1);--md-accent-fg-color--transparent:hsla(31deg, 100%, 66%, 0.1);--md-accent-bg-color:var(--md-default-bg-color);--md-accent-bg-color--light:var(--md-default-bg-color--light)}:root{--md-heart:#ff5252;--md-heart-big:#ff1744}:root :focus-visible{outline-style:solid}:root [data-md-color-scheme=dracula]{--md-heart:hsl(326, 100%, 74%);--md-heart-big:hsl(0, 100%, 67%)}.md-typeset h4{margin:2em 0 1em}.md-typeset a.source-link{position:relative;top:-.6rem;float:right;color:var(--md-default-fg-color--lighter);transition:color 125ms}.md-typeset a.source-link:hover{color:var(--md-accent-fg-color)}.md-typeset a.source-link .twemoji{height:1.2rem}.md-typeset a.source-link .twemoji svg{width:1.2rem;height:1.2rem}.md-typeset div.highlight.md-max-height pre>code{max-height:15rem}.twemoji.heart-throb svg,.twemoji.heart-throb-hover svg{position:relative;color:var(--md-heart);animation:pulse 1.5s ease infinite}@keyframes pulse{0%{transform:scale(1)}40%{color:var(--md-heart-big);transform:scale(1.3)}50%{transform:scale(1.2)}60%{color:var(--md-heart-big);transform:scale(1.3)}100%{transform:scale(1)}}footer.sponsorship{text-align:center}footer.sponsorship hr{display:inline-block;width:1.6rem;margin:0 .7rem;vertical-align:middle;border-bottom:2px solid var(--md-default-fg-color--lighter)}footer.sponsorship:hover hr{border-color:var(--md-accent-fg-color)}footer.sponsorship:not(:hover) .twemoji.heart-throb-hover svg{color:var(--md-default-fg-color--lighter)!important}body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .light-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .system-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=dracula] .md-icon .unknown-mode{display:none}body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .dark-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .system-mode,body:not([data-md-prefers-color-scheme=true])[data-md-color-scheme=default] .md-icon .unknown-mode{display:none}body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .dark-mode,body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .light-mode,body:not([data-md-prefers-color-scheme=true]):not([data-md-color-scheme=default]):not([data-md-color-scheme=dracula]) .md-icon .system-mode{display:none}body[data-md-prefers-color-scheme=true] .md-icon .dark-mode,body[data-md-prefers-color-scheme=true] .md-icon .light-mode,body[data-md-prefers-color-scheme=true] .md-icon .unknown-mode{display:none}.md-header-nav__scheme{z-index:0}[data-md-toggle=search]:checked~.md-header .md-header-nav__scheme{display:none}.md-typeset .admonition,.md-typeset details{border-width:0;border-left-width:4px}:root>*{--md-admonition-bg-color:transparent;--md-admonition-icon--settings:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 15.5A3.5 3.5 0 0 1 8.5 12 3.5 3.5 0 0 1 12 8.5a3.5 3.5 0 0 1 3.5 3.5 3.5 3.5 0 0 1-3.5 3.5m7.43-2.53c.04-.32.07-.64.07-.97 0-.33-.03-.66-.07-1l2.11-1.63c.19-.15.24-.42.12-.64l-2-3.46c-.12-.22-.39-.31-.61-.22l-2.49 1c-.52-.39-1.06-.73-1.69-.98l-.37-2.65A.506.506 0 0 0 14 2h-4c-.25 0-.46.18-.5.42l-.37 2.65c-.63.25-1.17.59-1.69.98l-2.49-1c-.22-.09-.49 0-.61.22l-2 3.46c-.13.22-.07.49.12.64L4.57 11c-.04.34-.07.67-.07 1 0 .33.03.65.07.97l-2.11 1.66c-.19.15-.25.42-.12.64l2 3.46c.12.22.39.3.61.22l2.49-1.01c.52.4 1.06.74 1.69.99l.37 2.65c.04.24.25.42.5.42h4c.25 0 .46-.18.5-.42l.37-2.65c.63-.26 1.17-.59 1.69-.99l2.49 1.01c.22.08.49 0 .61-.22l2-3.46c.12-.22.07-.49-.12-.64l-2.11-1.66Z"/></svg>');--md-admonition-bg-color--settings:rgba(170, 0, 255, 0.1);--md-admonition-icon-color--settings:#aa00ff;--md-admonition-shadow-color--settings:rgba(170, 0, 255, 0.1);--md-admonition-icon--new:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m23 12-2.44-2.78.34-3.68-3.61-.82-1.89-3.18L12 3 8.6 1.54 6.71 4.72l-3.61.81.34 3.68L1 12l2.44 2.78-.34 3.69 3.61.82 1.89 3.18L12 21l3.4 1.46 1.89-3.18 3.61-.82-.34-3.68L23 12m-10 5h-2v-2h2v2m0-4h-2V7h2v6Z"/></svg>');--md-admonition-bg-color--new:rgba(255, 214, 0, 0.1);--md-admonition-icon-color--new:#ffd600;--md-admonition-shadow-color--new:rgba(255, 214, 0, 0.1);--md-admonition-bg-color--note:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--note:hsl(51, 94%, 73%);--md-admonition-shadow-color--note:rgba(251, 231, 121, 0.1);--md-admonition-bg-color--abstract:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--abstract:hsl(191, 97%, 77%);--md-admonition-shadow-color--abstract:rgba(139, 232, 253, 0.1);--md-admonition-bg-color--info:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--info:hsl(190, 94%, 87%);--md-admonition-shadow-color--info:rgba(191, 243, 253, 0.1);--md-admonition-bg-color--tip:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--tip:hsl(161, 97%, 77%);--md-admonition-shadow-color--tip:rgba(139, 253, 217, 0.1);--md-admonition-bg-color--success:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--success:hsl(135, 94%, 65%);--md-admonition-shadow-color--success:rgba(82, 250, 124, 0.1);--md-admonition-bg-color--question:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--question:hsl(135, 92%, 79%);--md-admonition-shadow-color--question:rgba(152, 251, 177, 0.1);--md-admonition-bg-color--warning:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--warning:hsl(31, 100%, 71%);--md-admonition-shadow-color--warning:rgba(255, 184, 107, 0.1);--md-admonition-bg-color--failure:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--failure:hsl(0, 100%, 59%);--md-admonition-shadow-color--failure:rgba(255, 46, 46, 0.1);--md-admonition-bg-color--danger:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--danger:hsl(0, 100%, 67%);--md-admonition-shadow-color--danger:rgba(255, 87, 87, 0.1);--md-admonition-bg-color--bug:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--bug:hsl(325, 100%, 64%);--md-admonition-shadow-color--bug:rgba(255, 71, 179, 0.1);--md-admonition-bg-color--example:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--example:hsl(265, 89%, 78%);--md-admonition-shadow-color--example:rgba(191, 149, 249, 0.1);--md-admonition-bg-color--quote:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--quote:hsl(225, 8%, 51%);--md-admonition-shadow-color--quote:rgba(120, 125, 140, 0.1)}:root>[data-md-color-scheme=dracula]{--md-admonition-icon-color:$drac-dark-yellow}:root>[data-md-color-scheme=dracula]{--md-admonition-bg-color--settings:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--settings:hsl(326, 100%, 74%);--md-admonition-shadow-color--settings:rgba(255, 122, 198, 0.1)}:root>[data-md-color-scheme=dracula]{--md-admonition-bg-color--new:var(--md-default-bg-color--ultra-dark);--md-admonition-icon-color--new:hsl(65, 92%, 76%);--md-admonition-shadow-color--new:rgba(241, 250, 137, 0.1)}[data-md-color-scheme=dracula] .md-typeset .admonition,[data-md-color-scheme=dracula] .md-typeset details{border-color:var(--md-admonition-icon-color--note);box-shadow:var(--md-shadow-z2)}[data-md-color-scheme=dracula] .md-typeset .admonition:focus-within,[data-md-color-scheme=dracula] .md-typeset details:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details>summary{background-color:var(--md-admonition-bg-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details>summary::before{background-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details>summary::after{color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note,[data-md-color-scheme=dracula] .md-typeset details.note{border-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note:focus-within,[data-md-color-scheme=dracula] .md-typeset details.note:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.note>summary{background-color:var(--md-admonition-bg-color--note);border-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.note>summary::before{background-color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.note>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.note>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.note>summary::after{color:var(--md-admonition-icon-color--note)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract,[data-md-color-scheme=dracula] .md-typeset details.abstract{border-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract:focus-within,[data-md-color-scheme=dracula] .md-typeset details.abstract:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary{background-color:var(--md-admonition-bg-color--abstract);border-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary::before{background-color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.abstract>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.abstract>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.abstract>summary::after{color:var(--md-admonition-icon-color--abstract)}[data-md-color-scheme=dracula] .md-typeset .admonition.info,[data-md-color-scheme=dracula] .md-typeset details.info{border-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info:focus-within,[data-md-color-scheme=dracula] .md-typeset details.info:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.info>summary{background-color:var(--md-admonition-bg-color--info);border-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.info>summary::before{background-color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.info>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.info>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.info>summary::after{color:var(--md-admonition-icon-color--info)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip,[data-md-color-scheme=dracula] .md-typeset details.tip{border-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip:focus-within,[data-md-color-scheme=dracula] .md-typeset details.tip:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.tip>summary{background-color:var(--md-admonition-bg-color--tip);border-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.tip>summary::before{background-color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.tip>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.tip>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.tip>summary::after{color:var(--md-admonition-icon-color--tip)}[data-md-color-scheme=dracula] .md-typeset .admonition.success,[data-md-color-scheme=dracula] .md-typeset details.success{border-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success:focus-within,[data-md-color-scheme=dracula] .md-typeset details.success:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.success>summary{background-color:var(--md-admonition-bg-color--success);border-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.success>summary::before{background-color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.success>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.success>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.success>summary::after{color:var(--md-admonition-icon-color--success)}[data-md-color-scheme=dracula] .md-typeset .admonition.question,[data-md-color-scheme=dracula] .md-typeset details.question{border-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question:focus-within,[data-md-color-scheme=dracula] .md-typeset details.question:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.question>summary{background-color:var(--md-admonition-bg-color--question);border-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.question>summary::before{background-color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.question>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.question>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.question>summary::after{color:var(--md-admonition-icon-color--question)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning,[data-md-color-scheme=dracula] .md-typeset details.warning{border-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning:focus-within,[data-md-color-scheme=dracula] .md-typeset details.warning:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.warning>summary{background-color:var(--md-admonition-bg-color--warning);border-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.warning>summary::before{background-color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.warning>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.warning>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.warning>summary::after{color:var(--md-admonition-icon-color--warning)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure,[data-md-color-scheme=dracula] .md-typeset details.failure{border-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure:focus-within,[data-md-color-scheme=dracula] .md-typeset details.failure:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.failure>summary{background-color:var(--md-admonition-bg-color--failure);border-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.failure>summary::before{background-color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.failure>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.failure>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.failure>summary::after{color:var(--md-admonition-icon-color--failure)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger,[data-md-color-scheme=dracula] .md-typeset details.danger{border-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger:focus-within,[data-md-color-scheme=dracula] .md-typeset details.danger:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.danger>summary{background-color:var(--md-admonition-bg-color--danger);border-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.danger>summary::before{background-color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.danger>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.danger>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.danger>summary::after{color:var(--md-admonition-icon-color--danger)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug,[data-md-color-scheme=dracula] .md-typeset details.bug{border-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug:focus-within,[data-md-color-scheme=dracula] .md-typeset details.bug:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.bug>summary{background-color:var(--md-admonition-bg-color--bug);border-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.bug>summary::before{background-color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.bug>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.bug>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.bug>summary::after{color:var(--md-admonition-icon-color--bug)}[data-md-color-scheme=dracula] .md-typeset .admonition.example,[data-md-color-scheme=dracula] .md-typeset details.example{border-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example:focus-within,[data-md-color-scheme=dracula] .md-typeset details.example:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.example>summary{background-color:var(--md-admonition-bg-color--example);border-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.example>summary::before{background-color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.example>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.example>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.example>summary::after{color:var(--md-admonition-icon-color--example)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote,[data-md-color-scheme=dracula] .md-typeset details.quote{border-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote:focus-within,[data-md-color-scheme=dracula] .md-typeset details.quote:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title,[data-md-color-scheme=dracula] .md-typeset details.quote>summary{background-color:var(--md-admonition-bg-color--quote);border-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title::before,[data-md-color-scheme=dracula] .md-typeset details.quote>summary::before{background-color:var(--md-admonition-icon-color--quote)}[data-md-color-scheme=dracula] .md-typeset .admonition.quote>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.quote>.admonition-title::after,[data-md-color-scheme=dracula] .md-typeset details.quote>summary::after{color:var(--md-admonition-icon-color--quote)}.md-typeset .admonition.config,.md-typeset .admonition.settings,.md-typeset details.config,.md-typeset details.settings{border-color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.config:focus-within,.md-typeset .admonition.settings:focus-within,.md-typeset details.config:focus-within,.md-typeset details.settings:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--settings)}.md-typeset .admonition.config>.admonition-title,.md-typeset .admonition.settings>.admonition-title,.md-typeset details.config>.admonition-title,.md-typeset details.config>summary,.md-typeset details.settings>.admonition-title,.md-typeset details.settings>summary{background-color:var(--md-admonition-bg-color--settings);border-color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.config>.admonition-title::before,.md-typeset .admonition.settings>.admonition-title::before,.md-typeset details.config>.admonition-title::before,.md-typeset details.config>summary::before,.md-typeset details.settings>.admonition-title::before,.md-typeset details.settings>summary::before{width:1rem;height:1rem;background-color:var(--md-admonition-icon-color--settings);background-size:1rem;-webkit-mask-image:var(--md-admonition-icon--settings);mask-image:var(--md-admonition-icon--settings);content:" "}.md-typeset .admonition.config>.admonition-title::after,.md-typeset .admonition.settings>.admonition-title::after,.md-typeset details.config>.admonition-title::after,.md-typeset details.config>summary::after,.md-typeset details.settings>.admonition-title::after,.md-typeset details.settings>summary::after{color:var(--md-admonition-icon-color--settings)}.md-typeset .admonition.new,.md-typeset details.new{border-color:var(--md-admonition-icon-color--new)}.md-typeset .admonition.new:focus-within,.md-typeset details.new:focus-within{box-shadow:0 0 0 .2rem var(--md-admonition-shadow-color--new)}.md-typeset .admonition.new>.admonition-title,.md-typeset details.new>.admonition-title,.md-typeset details.new>summary{background-color:var(--md-admonition-bg-color--new);border-color:var(--md-admonition-icon-color--new)}.md-typeset .admonition.new>.admonition-title::before,.md-typeset details.new>.admonition-title::before,.md-typeset details.new>summary::before{width:1rem;height:1rem;background-color:var(--md-admonition-icon-color--new);background-size:1rem;-webkit-mask-image:var(--md-admonition-icon--new);mask-image:var(--md-admonition-icon--new);content:" "}.md-typeset .admonition.new>.admonition-title::after,.md-typeset details.new>.admonition-title::after,.md-typeset details.new>summary::after{color:var(--md-admonition-icon-color--new)}mjx-container[display=true]{font-size:120%!important}mjx-container:not([display]){font-size:100%!important}[data-md-color-scheme=dracula] .CtxtMenu_InfoContent pre,[data-md-color-scheme=dracula] .CtxtMenu_InfoSignature input,[data-md-color-scheme=slate] .CtxtMenu_InfoContent pre,[data-md-color-scheme=slate] .CtxtMenu_InfoSignature input{color:#000}[data-md-color-scheme=dracula] .CtxtMenu_Info,[data-md-color-scheme=dracula] .CtxtMenu_Menu,[data-md-color-scheme=slate] .CtxtMenu_Info,[data-md-color-scheme=slate] .CtxtMenu_Menu{box-shadow:0 10px 20px rgba(0,0,0,.5)}.md-typeset .arithmatex{overflow-x:auto!important;overflow-y:hidden!important}.katex-display .katex-html{display:flex!important;flex-direction:row;flex-wrap:nowrap;align-items:baseline;justify-content:space-between}.katex-display .katex-html .base{display:inline!important}.katex-display .katex-html .tag{position:relative!important;display:inline!important;margin-left:var(--margin-small)}.md-typeset del.critic,.md-typeset ins.critic,.md-typeset mark.critic{padding:0 .25em;color:unset;box-shadow:none}.md-typeset .critic.break{margin:0}.md-typeset details{overflow:hidden}.md-typeset details>summary:focus{outline-style:none}.highlight .kc{color:var(--md-code-hl-constant-color)}.highlight .nc,.highlight .ne{color:var(--md-code-hl-class-color)}.highlight .mb{color:var(--md-code-hl-number-color)}.highlight .bp,.highlight .nb{color:var(--md-code-hl-builtin-color)}.highlight .nn{color:var(--md-code-hl-namespace-color)}.highlight .na,.highlight .nd,.highlight .ni{color:var(--md-code-hl-entity-color)}.highlight .nl,.highlight .nt{color:var(--md-code-hl-tag-color)}.md-typeset :not(pre)>code{margin:0;padding:0 .2941176471em;color:var(--md-code-fg-color);background-color:var(--md-code-inline-bg-color);border-radius:.1rem;box-shadow:none}.md-typeset a>code{color:inherit!important;background-color:var(--md-code-link-bg-color)!important;transition:color 125ms;transition:background-color 125ms}.md-typeset a>code *{color:var(--md-typeset-a-color)!important}.md-typeset a>code:hover{background-color:var(--md-code-link-accent-bg-color)!important}.md-typeset a>code:hover *{color:var(--md-accent-fg-color)!important}.md-typeset pre>code{outline:0}.md-typeset td code{word-break:normal}.md-typeset .highlight{-moz-tab-size:8;-o-tab-size:8;tab-size:8}.md-typeset .highlight+.result{border-width:.1rem}.md-typeset .highlight [data-linenos].special::before{background-color:var(--md-code-special-bg-color)}.md-typeset .highlighttable .linenodiv .special{margin-right:-.5882352941em;margin-left:-1.1764705882em;padding-right:.5882352941em;padding-left:1.1764705882em;background-color:var(--md-code-special-bg-color)}.md-typeset .highlight span.filename{position:relative;display:block;margin-top:1em;padding:.5em 1.1764705882em .5em 2.9411764706em;font-weight:700;font-size:.68rem;background-color:var(--md-code-title-bg-color);border-top-left-radius:.1rem;border-top-right-radius:.1rem}.md-typeset .highlight span.filename+pre{margin-top:0}.md-typeset .highlight span.filename+pre code{border-top-left-radius:0;border-top-right-radius:0}.md-typeset .highlight span.filename::before{position:absolute;left:.8823529412em;width:1.4705882353em;height:1.4705882353em;background-color:var(--md-default-fg-color);-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 19V7H4v12h16m0-16a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h16m-7 14v-2h5v2h-5m-3.42-4L5.57 9H8.4l3.3 3.3c.39.39.39 1.03 0 1.42L8.42 17H5.59l3.99-4Z"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 19V7H4v12h16m0-16a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h16m-7 14v-2h5v2h-5m-3.42-4L5.57 9H8.4l3.3 3.3c.39.39.39 1.03 0 1.42L8.42 17H5.59l3.99-4Z"/></svg>');-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;content:""}.md-typeset .collapse-code{position:relative;margin-top:1em;margin-bottom:1em}.md-typeset .collapse-code pre{margin-top:0;margin-bottom:0}.md-typeset .collapse-code input{display:none}.md-typeset .collapse-code input~.code-footer{width:100%;margin:0;padding:.25em .5em .25em 0}.md-typeset .collapse-code input~.code-footer label{position:relative;margin:.05em;padding:.15em .8em;color:var(--md-primary-bg-color);font-size:90%;background-color:var(--md-primary-fg-color);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;border-radius:.1rem;cursor:pointer;content:""}.md-typeset .collapse-code input~.code-footer label:hover{background-color:var(--md-accent-fg-color)}.md-typeset .collapse-code input~.code-footer label::before{position:absolute;top:.15em;left:.15em;display:block;box-sizing:border-box;width:1.25em;height:1.25em;background-color:var(--md-primary-bg-color);background-size:1.25em;content:""}.md-typeset .collapse-code input~.code-footer label.expand{display:none}.md-typeset .collapse-code input~.code-footer label.expand::before{-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 21v-2H6.41l4.5-4.5-1.41-1.41-4.5 4.5V14H3v7h7m4.5-10.09 4.5-4.5V10h2V3h-7v2h3.59l-4.5 4.5 1.41 1.41Z"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 21v-2H6.41l4.5-4.5-1.41-1.41-4.5 4.5V14H3v7h7m4.5-10.09 4.5-4.5V10h2V3h-7v2h3.59l-4.5 4.5 1.41 1.41Z"/></svg>')}.md-typeset .collapse-code input~.code-footer label.collapse::before{-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19.5 3.09 15 7.59V4h-2v7h7V9h-3.59l4.5-4.5-1.41-1.41M4 13v2h3.59l-4.5 4.5 1.41 1.41 4.5-4.5V20h2v-7H4Z"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19.5 3.09 15 7.59V4h-2v7h7V9h-3.59l4.5-4.5-1.41-1.41M4 13v2h3.59l-4.5 4.5 1.41 1.41 4.5-4.5V20h2v-7H4Z"/></svg>')}.md-typeset .collapse-code input:checked~.code-footer label.expand{display:inline}.md-typeset .collapse-code input:checked~.code-footer label.collapse{display:none}.md-typeset .collapse-code input:checked+div.highlight code{max-height:9.375em;overflow:hidden}.md-typeset .collapse-code input:checked~.code-footer{position:absolute;bottom:0;left:0;padding:2em .5em .5em .8rem;background-image:linear-gradient(to bottom,transparent,var(--md-default-bg-color) 80% 100%)}.md-typeset .keys .key-power::before{padding-right:.4em;content:"⏻"}.md-typeset .keys .key-fingerprint::before{padding-right:.4em;content:"☝"}:root>*{--magiclink-email-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 4H4c-1.11 0-2 .89-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2m-3 13H7v-2h10m0-2H7v-2h10m3-2h-3V6h3"/></svg>');--magiclink-github-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 2A10 10 0 0 0 2 12c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 .84-.27 2.75 1.02.79-.22 1.65-.33 2.5-.33.85 0 1.71.11 2.5.33 1.91-1.29 2.75-1.02 2.75-1.02.55 1.35.2 2.39.1 2.64.65.71 1.03 1.6 1.03 2.71 0 3.82-2.34 4.66-4.57 4.91.36.31.69.92.69 1.85V21c0 .27.16.59.67.5C19.14 20.16 22 16.42 22 12A10 10 0 0 0 12 2Z"/></svg>');--magiclink-bitbucket-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M2.65 3C2.3 3 2 3.3 2 3.65v.12l2.73 16.5c.07.42.43.73.85.73h13.05c.31 0 .59-.22.64-.54L22 3.77a.643.643 0 0 0-.54-.73c-.03-.01-.07-.01-.11-.01L2.65 3M14.1 14.95H9.94L8.81 9.07h6.3l-1.01 5.88Z"/></svg>');--magiclink-gitlab-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m21.94 13.11-1.05-3.22c0-.03-.01-.06-.02-.09l-2.11-6.48a.859.859 0 0 0-.8-.57c-.36 0-.68.25-.79.58l-2 6.17H8.84L6.83 3.33a.851.851 0 0 0-.79-.58c-.37 0-.69.25-.8.58L3.13 9.82v.01l-1.07 3.28c-.16.5.01 1.04.44 1.34l9.22 6.71c.17.12.39.12.56-.01l9.22-6.7c.43-.3.6-.84.44-1.34M8.15 10.45l2.57 7.91-6.17-7.91m8.73 7.92 2.47-7.59.1-.33h3.61l-5.59 7.16m4.1-13.67 1.81 5.56h-3.62m-1.3.95-1.79 5.51L12 19.24l-2.86-8.79M6.03 3.94 7.84 9.5H4.23m-1.18 4.19c-.09-.07-.13-.19-.09-.29l.79-2.43 5.82 7.45m11.38-4.73-6.51 4.73.02-.03 5.79-7.42.79 2.43c.04.1 0 .22-.09.29"/></svg>');--magiclink-commit-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M16.944 11h4.306a.75.75 0 0 1 0 1.5h-4.306a5.001 5.001 0 0 1-9.888 0H2.75a.75.75 0 0 1 0-1.5h4.306a5.001 5.001 0 0 1 9.888 0Zm-1.444.75a3.5 3.5 0 1 0-7 0 3.5 3.5 0 0 0 7 0Z"/></svg>');--magiclink-compare-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12.5 6.75a.75.75 0 0 0-1.5 0V9H8.75a.75.75 0 0 0 0 1.5H11v2.25a.75.75 0 0 0 1.5 0V10.5h2.25a.75.75 0 0 0 0-1.5H12.5V6.75ZM8.75 16a.75.75 0 0 0 0 1.5h6a.75.75 0 0 0 0-1.5h-6Z"/><path d="M5 1h9.982a2 2 0 0 1 1.414.586l4.018 4.018A2 2 0 0 1 21 7.018V21a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V3a2 2 0 0 1 2-2Zm-.5 2v18a.5.5 0 0 0 .5.5h14a.5.5 0 0 0 .5-.5V7.018a.5.5 0 0 0-.146-.354l-4.018-4.018a.5.5 0 0 0-.354-.146H5a.5.5 0 0 0-.5.5Z"/></svg>');--magiclink-pull-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M16 19.25a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0Zm-14.5 0a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0Zm0-14.5a3.25 3.25 0 1 1 6.5 0 3.25 3.25 0 0 1-6.5 0ZM4.75 3a1.75 1.75 0 1 0 .001 3.501A1.75 1.75 0 0 0 4.75 3Zm0 14.5a1.75 1.75 0 1 0 .001 3.501A1.75 1.75 0 0 0 4.75 17.5Zm14.5 0a1.75 1.75 0 1 0 .001 3.501 1.75 1.75 0 0 0-.001-3.501Z"/><path d="M13.405 1.72a.75.75 0 0 1 0 1.06L12.185 4h4.065A3.75 3.75 0 0 1 20 7.75v8.75a.75.75 0 0 1-1.5 0V7.75a2.25 2.25 0 0 0-2.25-2.25h-4.064l1.22 1.22a.75.75 0 0 1-1.061 1.06l-2.5-2.5a.75.75 0 0 1 0-1.06l2.5-2.5a.75.75 0 0 1 1.06 0ZM4.75 7.25A.75.75 0 0 1 5.5 8v8A.75.75 0 0 1 4 16V8a.75.75 0 0 1 .75-.75Z"/></svg>');--magiclink-issue-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 1c6.075 0 11 4.925 11 11s-4.925 11-11 11S1 18.075 1 12 5.925 1 12 1ZM2.5 12a9.5 9.5 0 0 0 9.5 9.5 9.5 9.5 0 0 0 9.5-9.5A9.5 9.5 0 0 0 12 2.5 9.5 9.5 0 0 0 2.5 12Zm9.5 2a2 2 0 1 1-.001-3.999A2 2 0 0 1 12 14Z"/></svg>');--magiclink-discussion-icon:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"/><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"/></svg>')}.md-typeset a[href^="mailto:"]:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-email-icon);mask-image:var(--magiclink-email-icon)}.md-typeset .magiclink-commit:not(.magiclink-ignore),.md-typeset .magiclink-compare:not(.magiclink-ignore),.md-typeset .magiclink-discussion:not(.magiclink-ignore),.md-typeset .magiclink-issue:not(.magiclink-ignore),.md-typeset .magiclink-pull:not(.magiclink-ignore),.md-typeset .magiclink-repository:not(.magiclink-ignore),.md-typeset a[href^="mailto:"]:not(.magiclink-ignore){position:relative;padding-left:1.375em}.md-typeset .magiclink-commit:not(.magiclink-ignore)::before,.md-typeset .magiclink-compare:not(.magiclink-ignore)::before,.md-typeset .magiclink-discussion:not(.magiclink-ignore)::before,.md-typeset .magiclink-issue:not(.magiclink-ignore)::before,.md-typeset .magiclink-pull:not(.magiclink-ignore)::before,.md-typeset .magiclink-repository:not(.magiclink-ignore)::before,.md-typeset a[href^="mailto:"]:not(.magiclink-ignore)::before{position:absolute;top:0;left:0;display:block;box-sizing:border-box;width:1.25em;height:1.25em;background-color:var(--md-typeset-a-color);background-size:1.25em;transition:background-color 125ms;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;content:""}.md-typeset .magiclink-commit:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-compare:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-discussion:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-issue:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-pull:not(.magiclink-ignore):hover::before,.md-typeset .magiclink-repository:not(.magiclink-ignore):hover::before,.md-typeset a[href^="mailto:"]:not(.magiclink-ignore):hover::before{background-color:var(--md-accent-fg-color)}.md-typeset .magiclink-commit:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-commit-icon);mask-image:var(--magiclink-commit-icon)}.md-typeset .magiclink-compare:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-compare-icon);mask-image:var(--magiclink-compare-icon)}.md-typeset .magiclink-pull:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-pull-icon);mask-image:var(--magiclink-pull-icon)}.md-typeset .magiclink-issue:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-issue-icon);mask-image:var(--magiclink-issue-icon)}.md-typeset .magiclink-discussion:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-discussion-icon);mask-image:var(--magiclink-discussion-icon)}.md-typeset .magiclink-repository.magiclink-github:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-github-icon);mask-image:var(--magiclink-github-icon)}.md-typeset .magiclink-repository.magiclink-gitlab:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-gitlab-icon);mask-image:var(--magiclink-gitlab-icon)}.md-typeset .magiclink-repository.magiclink-bitbucket:not(.magiclink-ignore)::before{-webkit-mask-image:var(--magiclink-bitbucket-icon);mask-image:var(--magiclink-bitbucket-icon)}.md-typeset mark:not(.critic){box-shadow:none}.md-typeset .progress-label{position:absolute;width:100%;margin:0;color:var(--md-text-color);font-weight:700;line-height:1.4rem;white-space:nowrap;text-align:center;text-shadow:-.0625em -.0625em .375em var(--md-default-bg-color--light),.0625em -.0625em .375em var(--md-default-bg-color--light),-.0625em .0625em .375em var(--md-default-bg-color--light),.0625em .0625em .375em var(--md-default-bg-color--light)}.md-typeset .progress-bar{float:left;height:1.2rem;background-color:#2979ff}.md-typeset .candystripe-animate .progress-bar{animation:animate-stripes 3s linear infinite}.md-typeset .progress{position:relative;display:block;width:100%;height:1.2rem;margin:.5rem 0;background-color:var(--md-default-fg-color--lightest)}.md-typeset .progress.thin{height:.4rem;margin-top:.9rem}.md-typeset .progress.thin .progress-label{margin-top:-.4rem}.md-typeset .progress.thin .progress-bar{height:.4rem}.md-typeset .progress.candystripe .progress-bar{background-image:linear-gradient(135deg,var(--md-progress-stripe) 27%,transparent 27%,transparent 52%,var(--md-progress-stripe) 52%,var(--md-progress-stripe) 77%,transparent 77%,transparent);background-size:2rem 2rem}.md-typeset .progress-100plus .progress-bar{background-color:var(--md-progress-100)}.md-typeset .progress-80plus .progress-bar{background-color:var(--md-progress-80)}.md-typeset .progress-60plus .progress-bar{background-color:var(--md-progress-60)}.md-typeset .progress-40plus .progress-bar{background-color:var(--md-progress-40)}.md-typeset .progress-20plus .progress-bar{background-color:var(--md-progress-20)}.md-typeset .progress-0plus .progress-bar{background-color:var(--md-progress-0)}@keyframes animate-stripes{0%{background-position:0 0}100%{background-position:6rem 0}}[data-md-color-scheme=dracula] .md-typeset .tabbed-set>.tabbed-labels{box-shadow:0 -.05rem var(--md-default-fg-color--lighter) inset}.md-typeset .tabbed-alternate.tabbed-set .tabbed-control{width:2rem}.md-typeset .tabbed-alternate.tabbed-set .tabbed-control[hidden]{width:1.2rem;opacity:0}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block{padding:0 .6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilite:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilitetable:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlight:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlighttable:only-child,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>pre:only-child{margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilite:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.codehilitetable:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlight:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.highlighttable:only-child span.filename,.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>pre:only-child span.filename{margin-top:0}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.collapse-code:only-child{margin-top:0;margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>.collapse-code:only-child>.code-footer{left:.6rem}.md-typeset .tabbed-alternate.tabbed-set>.tabbed-content>.tabbed-block>diagram-div:only-child{margin-right:-1.2rem;margin-left:-1.2rem;padding-right:.6rem;padding-left:.6rem}.js .md-typeset .tabbed-labels::before{background-color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-typeset table:not([class]){box-shadow:var(--md-shadow-z2)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) tr:hover{background-color:rgba(0,0,0,.08)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) th{color:var(--md-text-color);background-color:var(--md-default-bg-color--ultra-dark);border-bottom:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-typeset table:not([class]) td{border-top:.05rem solid var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] .md-typeset .task-list-control .task-list-indicator::before{background-color:var(--md-default-fg-color--lighter)}[data-md-color-scheme=dracula] .md-typeset .task-list-control [type=checkbox]:checked+.task-list-indicator::before{background-color:#51f97b}.md-typeset .headerlink{width:1em;height:1em;vertical-align:middle;background-color:var(--md-default-fg-color--lighter);background-size:1em;-webkit-mask-size:1em;mask-size:1em;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;visibility:visible;-webkit-mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1M8 13h8v-2H8v2m9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1 0 1.71-1.39 3.1-3.1 3.1h-4V17h4a5 5 0 0 0 5-5 5 5 0 0 0-5-5Z"/></svg>');mask-image:url('data:image/svg+xml;charset=utf-8,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3.9 12c0-1.71 1.39-3.1 3.1-3.1h4V7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1M8 13h8v-2H8v2m9-6h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1 0 1.71-1.39 3.1-3.1 3.1h-4V17h4a5 5 0 0 0 5-5 5 5 0 0 0-5-5Z"/></svg>')}.md-typeset .headerlink:hover,.md-typeset [id]:target .headerlink{background-color:var(--md-accent-fg-color)}diagram-div{overflow:auto}html{background-color:transparent}[data-md-component=announce] .twemoji{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula]{--md-text-color:var(--md-default-fg-color);background-color:var(--md-default-bg-color);--md-footer-bg-color:transparent;--md-footer-bg-color--dark:var(--md-default-bg-color--darkest);--md-header-fg-color:var(--md-text-color);--md-header-bg-color:var(--md-default-bg-color--darkest)}[data-md-color-scheme=dracula] .md-header{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-header[data-md-state=shadow]{box-shadow:0 0 .2rem rgba(0,0,0,.15),0 0 .2rem .4rem rgba(0,0,0,.2)}[data-md-color-scheme=dracula] .md-top{background-color:var(--md-default-bg-color--dark)}[data-md-color-scheme=dracula] .md-top:hover{background-color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-tabs{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-tabs__link--active{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-tabs__link:hover{color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-hero{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-nav__source{color:var(--md-text-color)}[data-md-color-scheme=dracula] .md-nav__link[data-md-state=blur]{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] .md-nav__item .md-nav__link--active{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-nav__link:focus,[data-md-color-scheme=dracula] .md-nav__link:hover{color:var(--md-accent-fg-color)}[data-md-color-scheme=dracula] .md-search__input{color:var(--md-text-color);background-color:var(--md-accent-bg-color--light)}[data-md-color-scheme=dracula] .md-search__input:hover{background-color:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] .md-search__input~.md-search__icon{color:var(--md-text-color)}[data-md-color-scheme=dracula] .md-search__input::-moz-placeholder{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] .md-search__input::placeholder{color:var(--md-default-fg-color--light)}[data-md-color-scheme=dracula] [data-md-toggle=search]:checked~.md-header .md-search__input{background-color:transparent}[data-md-color-scheme=dracula] .md-search__suggest{color:var(--md-default-autocomplete-fg-color)}[data-md-color-scheme=dracula] .md-overlay,[data-md-color-scheme=dracula] .md-search__overlay{background-color:var(--md-default-bg-color--light)}[data-md-color-scheme=dracula] .md-footer-nav__direction{color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-footer-meta{border-top:.05rem solid var(--md-primary-fg-color)}[data-md-color-scheme=dracula] [data-md-component=announce]{background-color:var(--md-default-bg-color--ultra-dark)}.md-typeset h5{color:var(--md-text-color);text-transform:none}.md-search__scrollwrap,.md-sidebar__scrollwrap,.md-typeset diagram-div,.md-typeset div.arithmatex,.md-typeset div.diagram,.md-typeset div.mermaid,.md-typeset mermaid-div,.md-typeset pre.arithmatex,.md-typeset pre>code,.md-typeset__scrollwrap{scrollbar-color:var(--md-default-fg-color--lighter) transparent;scrollbar-width:thin}.md-search__scrollwrap::-webkit-scrollbar,.md-sidebar__scrollwrap::-webkit-scrollbar,.md-typeset diagram-div::-webkit-scrollbar,.md-typeset div.arithmatex::-webkit-scrollbar,.md-typeset div.diagram::-webkit-scrollbar,.md-typeset div.mermaid::-webkit-scrollbar,.md-typeset mermaid-div::-webkit-scrollbar,.md-typeset pre.arithmatex::-webkit-scrollbar,.md-typeset pre>code::-webkit-scrollbar,.md-typeset__scrollwrap::-webkit-scrollbar{width:.2rem;height:.2rem}.md-search__scrollwrap::-webkit-scrollbar-corner,.md-sidebar__scrollwrap::-webkit-scrollbar-corner,.md-typeset diagram-div::-webkit-scrollbar-corner,.md-typeset div.arithmatex::-webkit-scrollbar-corner,.md-typeset div.diagram::-webkit-scrollbar-corner,.md-typeset div.mermaid::-webkit-scrollbar-corner,.md-typeset mermaid-div::-webkit-scrollbar-corner,.md-typeset pre.arithmatex::-webkit-scrollbar-corner,.md-typeset pre>code::-webkit-scrollbar-corner,.md-typeset__scrollwrap::-webkit-scrollbar-corner{background-color:transparent}.md-search__scrollwrap::-webkit-scrollbar-thumb,.md-sidebar__scrollwrap::-webkit-scrollbar-thumb,.md-typeset diagram-div::-webkit-scrollbar-thumb,.md-typeset div.arithmatex::-webkit-scrollbar-thumb,.md-typeset div.diagram::-webkit-scrollbar-thumb,.md-typeset div.mermaid::-webkit-scrollbar-thumb,.md-typeset mermaid-div::-webkit-scrollbar-thumb,.md-typeset pre.arithmatex::-webkit-scrollbar-thumb,.md-typeset pre>code::-webkit-scrollbar-thumb,.md-typeset__scrollwrap::-webkit-scrollbar-thumb{background-color:var(--md-default-fg-color--lighter)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover,.md-sidebar__scrollwrap::-webkit-scrollbar-thumb:hover,.md-typeset diagram-div::-webkit-scrollbar-thumb:hover,.md-typeset div.arithmatex::-webkit-scrollbar-thumb:hover,.md-typeset div.diagram::-webkit-scrollbar-thumb:hover,.md-typeset div.mermaid::-webkit-scrollbar-thumb:hover,.md-typeset mermaid-div::-webkit-scrollbar-thumb:hover,.md-typeset pre.arithmatex::-webkit-scrollbar-thumb:hover,.md-typeset pre>code::-webkit-scrollbar-thumb:hover,.md-typeset__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:var(--md-accent-fg-color)}.md-search__scrollwrap:hover,.md-sidebar__scrollwrap:hover,.md-typeset diagram-div:hover,.md-typeset div.arithmatex:hover,.md-typeset div.diagram:hover,.md-typeset div.mermaid:hover,.md-typeset mermaid-div:hover,.md-typeset pre.arithmatex:hover,.md-typeset pre>code:hover,.md-typeset__scrollwrap:hover{scrollbar-color:var(--md-accent-fg-color) transparent}@media screen and (max-width:59.9375em){.md-header-nav__scheme{padding-right:0}label[for=__search]{padding-left:0}[data-md-color-scheme=dracula] .md-nav__source{color:var(--md-text-color);background-color:var(--md-primary-fg-color--transparent)}[data-md-color-scheme=dracula] .md-nav .md-nav__title{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}}@media screen and (max-width:44.9375em){.md-typeset>diagram-div{margin-right:-.8rem;margin-left:-.8rem}.md-typeset>.collapse-code{margin-right:-.8rem;margin-left:-.8rem}.md-typeset>.collapse-code label.collapse{left:.8rem}[dir=ltr] .md-content__inner>.tabbed-set .tabbed-labels{padding-left:0}.md-content__inner>.tabbed-set .tabbed-labels{max-width:100%;margin:0;padding-inline-start:0;scroll-padding-inline-start:0}.md-content__inner>.tabbed-set .tabbed-labels::after{padding-inline-end:0;content:none}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--prev{margin-inline-start:0;padding-inline-start:0}.md-content__inner>.tabbed-set .tabbed-labels~.tabbed-control--next{margin-inline-end:0;padding-inline-end:0}}@media screen and (max-width:76.1875em){[data-md-color-scheme=dracula] .md-nav--primary .md-nav__item--active>.md-nav__link:not(:hover){color:var(--md-primary-fg-color)}[data-md-color-scheme=dracula] .md-nav--primary .md-nav__title{color:var(--md-text-color);background-color:var(--md-header-bg-color);border-bottom:.05rem solid var(--md-primary-fg-color)}}
/*# sourceMappingURL=extra-fb5a2a1c86.css.map */


================================================
FILE: docs/theme/assets/pymdownx-extras/extra-loader-MCFnu0Wd.js
================================================
function _typeof(t){return _typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},_typeof(t)}!function(){"use strict";function t(){t=function(){return r};var e,r={},n=Object.prototype,o=n.hasOwnProperty,i=Object.defineProperty||function(t,e,r){t[e]=r.value},a="function"==typeof Symbol?Symbol:{},c=a.iterator||"@@iterator",u=a.asyncIterator||"@@asyncIterator",l=a.toStringTag||"@@toStringTag";function f(t,e,r){return Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}),t[e]}try{f({},"")}catch(e){f=function(t,e,r){return t[e]=r}}function s(t,e,r,n){var o=e&&e.prototype instanceof g?e:g,a=Object.create(o.prototype),c=new N(n||[]);return i(a,"_invoke",{value:S(t,r,c)}),a}function h(t,e,r){try{return{type:"normal",arg:t.call(e,r)}}catch(t){return{type:"throw",arg:t}}}r.wrap=s;var d="suspendedStart",p="suspendedYield",y="executing",m="completed",v={};function g(){}function b(){}function w(){}var x={};f(x,c,(function(){return this}));var E=Object.getPrototypeOf,L=E&&E(E(C([])));L&&L!==n&&o.call(L,c)&&(x=L);var O=w.prototype=g.prototype=Object.create(x);function _(t){["next","throw","return"].forEach((function(e){f(t,e,(function(t){return this._invoke(e,t)}))}))}function j(t,e){function r(n,i,a,c){var u=h(t[n],t,i);if("throw"!==u.type){var l=u.arg,f=l.value;return f&&"object"==_typeof(f)&&o.call(f,"__await")?e.resolve(f.__await).then((function(t){r("next",t,a,c)}),(function(t){r("throw",t,a,c)})):e.resolve(f).then((function(t){l.value=t,a(l)}),(function(t){return r("throw",t,a,c)}))}c(u.arg)}var n;i(this,"_invoke",{value:function(t,o){function i(){return new e((function(e,n){r(t,o,e,n)}))}return n=n?n.then(i,i):i()}})}function S(t,r,n){var o=d;return function(i,a){if(o===y)throw new Error("Generator is already running");if(o===m){if("throw"===i)throw a;return{value:e,done:!0}}for(n.method=i,n.arg=a;;){var c=n.delegate;if(c){var u=P(c,n);if(u){if(u===v)continue;return u}}if("next"===n.method)n.sent=n._sent=n.arg;else if("throw"===n.method){if(o===d)throw o=m,n.arg;n.dispatchException(n.arg)}else"return"===n.method&&n.abrupt("return",n.arg);o=y;var l=h(t,r,n);if("normal"===l.type){if(o=n.done?m:p,l.arg===v)continue;return{value:l.arg,done:n.done}}"throw"===l.type&&(o=m,n.method="throw",n.arg=l.arg)}}}function P(t,r){var n=r.method,o=t.iterator[n];if(o===e)return r.delegate=null,"throw"===n&&t.iterator.return&&(r.method="return",r.arg=e,P(t,r),"throw"===r.method)||"return"!==n&&(r.method="throw",r.arg=new TypeError("The iterator does not provide a '"+n+"' method")),v;var i=h(o,t.iterator,r.arg);if("throw"===i.type)return r.method="throw",r.arg=i.arg,r.delegate=null,v;var a=i.arg;return a?a.done?(r[t.resultName]=a.value,r.next=t.nextLoc,"return"!==r.method&&(r.method="next",r.arg=e),r.delegate=null,v):a:(r.method="throw",r.arg=new TypeError("iterator result is not an object"),r.delegate=null,v)}function k(t){var e={tryLoc:t[0]};1 in t&&(e.catchLoc=t[1]),2 in t&&(e.finallyLoc=t[2],e.afterLoc=t[3]),this.tryEntries.push(e)}function M(t){var e=t.completion||{};e.type="normal",delete e.arg,t.completion=e}function N(t){this.tryEntries=[{tryLoc:"root"}],t.forEach(k,this),this.reset(!0)}function C(t){if(t||""===t){var r=t[c];if(r)return r.call(t);if("function"==typeof t.next)return t;if(!isNaN(t.length)){var n=-1,i=function r(){for(;++n<t.length;)if(o.call(t,n))return r.value=t[n],r.done=!1,r;return r.value=e,r.done=!0,r};return i.next=i}}throw new TypeError(_typeof(t)+" is not iterable")}return b.prototype=w,i(O,"constructor",{value:w,configurable:!0}),i(w,"constructor",{value:b,configurable:!0}),b.displayName=f(w,l,"GeneratorFunction"),r.isGeneratorFunction=function(t){var e="function"==typeof t&&t.constructor;return!!e&&(e===b||"GeneratorFunction"===(e.displayName||e.name))},r.mark=function(t){return Object.setPrototypeOf?Object.setPrototypeOf(t,w):(t.__proto__=w,f(t,l,"GeneratorFunction")),t.prototype=Object.create(O),t},r.awrap=function(t){return{__await:t}},_(j.prototype),f(j.prototype,u,(function(){return this})),r.AsyncIterator=j,r.async=function(t,e,n,o,i){void 0===i&&(i=Promise);var a=new j(s(t,e,n,o),i);return r.isGeneratorFunction(e)?a:a.next().then((function(t){return t.done?t.value:a.next()}))},_(O),f(O,l,"Generator"),f(O,c,(function(){return this})),f(O,"toString",(function(){return"[object Generator]"})),r.keys=function(t){var e=Object(t),r=[];for(var n in e)r.push(n);return r.reverse(),function t(){for(;r.length;){var n=r.pop();if(n in e)return t.value=n,t.done=!1,t}return t.done=!0,t}},r.values=C,N.prototype={constructor:N,reset:function(t){if(this.prev=0,this.next=0,this.sent=this._sent=e,this.done=!1,this.delegate=null,this.method="next",this.arg=e,this.tryEntries.forEach(M),!t)for(var r in this)"t"===r.charAt(0)&&o.call(this,r)&&!isNaN(+r.slice(1))&&(this[r]=e)},stop:function(){this.done=!0;var t=this.tryEntries[0].completion;if("throw"===t.type)throw t.arg;return this.rval},dispatchException:function(t){if(this.done)throw t;var r=this;function n(n,o){return c.type="throw",c.arg=t,r.next=n,o&&(r.method="next",r.arg=e),!!o}for(var i=this.tryEntries.length-1;i>=0;--i){var a=this.tryEntries[i],c=a.completion;if("root"===a.tryLoc)return n("end");if(a.tryLoc<=this.prev){var u=o.call(a,"catchLoc"),l=o.call(a,"finallyLoc");if(u&&l){if(this.prev<a.catchLoc)return n(a.catchLoc,!0);if(this.prev<a.finallyLoc)return n(a.finallyLoc)}else if(u){if(this.prev<a.catchLoc)return n(a.catchLoc,!0)}else{if(!l)throw new Error("try statement without catch or finally");if(this.prev<a.finallyLoc)return n(a.finallyLoc)}}}},abrupt:function(t,e){for(var r=this.tryEntries.length-1;r>=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&o.call(n,"finallyLoc")&&this.prev<n.finallyLoc){var i=n;break}}i&&("break"===t||"continue"===t)&&i.tryLoc<=e&&e<=i.finallyLoc&&(i=null);var a=i?i.completion:{};return a.type=t,a.arg=e,i?(this.method="next",this.next=i.finallyLoc,v):this.complete(a)},complete:function(t,e){if("throw"===t.type)throw t.arg;return"break"===t.type||"continue"===t.type?this.next=t.arg:"return"===t.type?(this.rval=this.arg=t.arg,this.method="return",this.next="end"):"normal"===t.type&&e&&(this.next=e),v},finish:function(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.finallyLoc===t)return this.complete(r.completion,r.afterLoc),M(r),v}},catch:function(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.tryLoc===t){var n=r.completion;if("throw"===n.type){var o=n.arg;M(r)}return o}}throw new Error("illegal catch attempt")},delegateYield:function(t,r,n){return this.delegate={iterator:C(t),resultName:r,nextLoc:n},"next"===this.method&&(this.arg=e),v}},r}function e(t,e,r,n,o,i,a){try{var c=t[i](a),u=c.value}catch(t){return void r(t)}c.done?e(u):Promise.resolve(u).then(n,o)}function r(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}function n(t,e){for(var r=0;r<e.length;r++){var n=e[r];n.enumerable=n.enumerable||!1,n.configurable=!0,"value"in n&&(n.writable=!0),Object.defineProperty(t,(o=n.key,i=void 0,i=function(t,e){if("object"!==_typeof(t)||null===t)return t;var r=t[Symbol.toPrimitive];if(void 0!==r){var n=r.call(t,e||"default");if("object"!==_typeof(n))return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===e?String:Number)(t)}(o,"string"),"symbol"===_typeof(i)?i:String(i)),n)}var o,i}function o(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),Object.defineProperty(t,"prototype",{writable:!1}),e&&a(t,e)}function i(t){return i=Object.setPrototypeOf?Object.getPrototypeOf.bind():function(t){return t.__proto__||Object.getPrototypeOf(t)},i(t)}function a(t,e){return a=Object.setPrototypeOf?Object.setPrototypeOf.bind():function(t,e){return t.__proto__=e,t},a(t,e)}function c(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],(function(){}))),!0}catch(t){return!1}}function u(t,e,r){return u=c()?Reflect.construct.bind():function(t,e,r){var n=[null];n.push.apply(n,e);var o=new(Function.bind.apply(t,n));return r&&a(o,r.prototype),o},u.apply(null,arguments)}function l(t){var e="function"==typeof Map?new Map:void 0;return l=function(t){if(null===t||!function(t){try{return-1!==Function.toString.call(t).indexOf("[native code]")}catch(e){return"function"==typeof t}}(t))return t;if("function"!=typeof t)throw new TypeError("Super expression must either be null or a function");if(void 0!==e){if(e.has(t))return e.get(t);e.set(t,r)}function r(){return u(t,arguments,i(this).constructor)}return r.prototype=Object.create(t.prototype,{constructor:{value:r,enumerable:!1,writable:!0,configurable:!0}}),a(r,t)},l(t)}function f(t,e){if(e&&("object"===_typeof(e)||"function"==typeof e))return e;if(void 0!==e)throw new TypeError("Derived constructors may only return object or undefined");return function(t){if(void 0===t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return t}(t)}var s,h,d,p,y=function(){var a,u=(a=t().mark((function e(a){var u,s,h,d,p,y,m,v,g,b,w,x,E,L,O,_;return t().wrap((function(t){for(;;)switch(t.prev=t.next){case 0:u=function(t){o(d,t);var e,a,u,l,s,h=(e=d,a=c(),function(){var t,r=i(e);if(a){var n=i(this).constructor;t=Reflect.construct(r,arguments,n)}else t=r.apply(this,arguments);return f(this,t)});function d(){var t;r(this,d);var e=(t=h.call(this)).attachShadow({mode:"open"}),n=document.createElement("style");return n.textContent="\n      :host {\n        display: block;\n        line-height: initial;\n        font-size: 16px;\n      }\n      div.diagram {\n        margin: 0;\n        overflow: visible;\n      }",e.appendChild(n),t}return u=d,l&&n(u.prototype,l),s&&n(u,s),Object.defineProperty(u,"prototype",{writable:!1}),u}(l(HTMLElement)),void 0===customElements.get("diagram-div")&&customElements.define("diagram-div",u),s=function(t){for(var e="",r=0;r<t.childNodes.length;r++){var n=t.childNodes[r];if("code"===n.tagName.toLowerCase())for(var o=0;o<n.childNodes.length;o++){var i=n.childNodes[o];if("#text"===i.nodeName&&!/^\s*$/.test(i.nodeValue)){e=i.nodeValue;break}}}return e},h={startOnLoad:!1,theme:"default",flowchart:{htmlLabels:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:"14px",actorFontSize:"14px",messageFontSize:"16px"}},mermaid.mermaidAPI.globalReset(),d=null;try{d=document.querySelector("[data-md-color-scheme]").getAttribute("data-md-color-scheme")}catch(t){d="default"}p="undefined"==typeof mermaidConfig?h:mermaidConfig[d]||mermaidConfig.default||h,mermaid.initialize(p),y=document.querySelectorAll("pre.".concat(a,", diagram-div")),m=document.querySelector("html body"),v=0;case 12:if(!(v<y.length)){t.next=47;break}return g=y[v],b="diagram-div"===g.tagName.toLowerCase()?g.shadowRoot.querySelector("pre.".concat(a)):g,(w=document.createElement("div")).style.visibility="hidden",w.style.display="display",w.style.padding="0",w.style.margin="0",w.style.lineHeight="initial",w.style.fontSize="16px",m.appendChild(w),t.prev=23,t.next=26,mermaid.render("_diagram_".concat(v),s(b),w);case 26:x=t.sent,E=x.svg,L=x.bindFunctions,(O=document.createElement("div")).className=a,O.innerHTML=E,L&&L(O),(_=document.createElement("diagram-div")).shadowRoot.appendChild(O),g.parentNode.insertBefore(_,g),b.style.display="none",_.shadowRoot.appendChild(b),b!==g&&g.parentNode.removeChild(g),t.next=43;break;case 41:t.prev=41,t.t0=t.catch(23);case 43:m.contains(w)&&m.removeChild(w);case 44:v++,t.next=12;break;case 47:case"end":return t.stop()}}),e,null,[[23,41]])})),function(){var t=this,r=arguments;return new Promise((function(n,o){var i=a.apply(t,r);function c(t){e(i,n,o,c,u,"next",t)}function u(t){e(i,n,o,c,u,"throw",t)}c(void 0)}))});return function(t){return u.apply(this,arguments)}}(),m=function(t,e){if("katex"===e)for(var r=document.querySelectorAll(".".concat(t)),n=0;n<r.length;n++){var o=r[n].textContent||r[n].innerText;o.startsWith("\\(")&&o.endsWith("\\)")?katex.render(o.slice(2,-2),r[n],{displayMode:!1}):o.startsWith("\\[")&&o.endsWith("\\]")&&katex.render(o.slice(2,-2),r[n],{displayMode:!0})}else"mathjax"===e&&(MathJax.startup.output.clearCache(),MathJax.typesetClear(),MathJax.texReset(),MathJax.typesetPromise())};s=Promise.resolve(),h=Promise.resolve(),d=new MutationObserver((function(t){t.forEach((function(t){if("attributes"===t.type){var e=t.target.getAttribute("data-md-color-scheme");e||(e="default"),localStorage.setItem("data-md-color-scheme",e),"undefined"!=typeof mermaid&&y("diagram")}}))})),p=function(){d.observe(document.querySelector("body"),{attributeFilter:["data-md-color-scheme"]}),"undefined"!=typeof mermaid&&(s=s.then((function(){y("diagram")})).catch((function(t){console.log("UML loading failed...".concat(t))}))),"undefined"!=typeof katex?h=h.then((function(){m("arithmatex","katex")})).catch((function(t){console.log("Math loading failed...".concat(t))})):"undefined"!=typeof MathJax&&"typesetPromise"in MathJax&&(h=h.then((function(){m("arithmatex","mathjax")})).catch((function(t){console.log("Math loading failed...".concat(t))})))},window.document$?window.document$.subscribe(p):document.addEventListener("DOMContentLoaded",p)}();
//# sourceMappingURL=extra-loader-MCFnu0Wd.js.map


================================================
FILE: docs/theme/assets/pymdownx-extras/material-extra-3rdparty-E-i8w1WA.js
================================================
!function(){"use strict";"mathjaxConfig"in window||(window.MathJax={tex:{inlineMath:[["\\(","\\)"]],displayMath:[["\\[","\\]"]],processEscapes:!0,processEnvironments:!0,tagSide:"right",tagIndent:".8em",multlineWidth:"85%",tags:"ams"},options:{ignoreHtmlClass:".*",processHtmlClass:"arithmatex"}}),"mermaidConfig"in window||(window.mermaidConfig={dracula:{startOnLoad:!1,theme:"base",themeCSS:"        * {          --drac-page-bg: hsl(233, 15%, 23%);          --drac-white-fg: hsl(60, 30%, 96%);          --drac-purple-fg: hsl(265, 89%, 78%);          --drac-purple-bg: hsl(265, 25%, 39%);          --drac-yellow-fg: hsl(65, 92%, 76%);          --drac-blue-fg: hsl(225, 27%, 51%);        }                /* General */        [id^='_diagram'] {          background-color: var(--drac-page-bg);        }                /* Entity Relationship */        rect.relationshipLabelBox {          opacity: 0.75 !important;          fill: var(--drac-purple-bg) !important;        }        defs marker#ZERO_OR_MORE_END circle {          fill: var(--drac-page-bg) !important;          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_START circle{          fill: var(--drac-page-bg) !important;          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_MORE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONLY_ONE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONLY_ONE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_ONE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ZERO_OR_ONE_END path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONE_OR_MORE_START path {          stroke: var(--drac-purple-fg) !important;        }        defs marker#ONE_OR_MORE_END path {          stroke: var(--drac-purple-fg) !important;        }                /* Flowchart */        .labelText,        :not(.branchLabel) > .label text {          fill: var(--drac-purple-fg);        }        .edgeLabel text {          fill: var(--drac-purple-fg) !important;        }        .edgeLabel rect {          opacity: 0.75 !important;          fill: var(--drac-purple-bg) !important;        }                .grey rect.label-container {           fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-fg) !important;        }         /* Sequence */        line[id^='actor'] {          stroke: var(--drac-blue-fg);        }        .noteText {          fill: var(--drac-yellow-fg);        }                /* Gantt */        .sectionTitle {          fill: var(--drac-purple-fg) !important;        }                .grid .tick line {          stroke: var(--drac-blue-fg) !important;        }                .grid .tick text {          fill: var(--drac-purple-fg);        }                /* Class Diagram */        .statediagram-state rect.divider {          fill: transparent !important;        }                /* State Diagram */        .stateGroup circle[style$=\"fill: black;\"] {          fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-bg) !important;        }                .stateGroup circle[style$=\"fill: white;\"] {          fill: var(--drac-purple-bg) !important;          stroke: var(--drac-purple-fg) !important;        }                .stateGroup .composit {          fill: var(--drac-page-bg);        }        /* Pie */        text.slice {          fill: var(--drac-white-fg) !important;        }        /* Git Graph */        .commit-bullets .commit-reverse,        .commit-bullets .commit-merge,         .commit-bullets .commit-highlight-inner {          fill: var(--drac-page-bg) !important;          stroke: var(--drac-page-bg) !important;        }        ",themeVariables:{darkMode:!0,background:"#323443",mainBkg:"#604b7d",textColor:"#bf95f9",lineColor:"#bf95f9",errorBkgColor:"#802c2c",errorTextColor:"#ff5757",primaryColor:"#604b7d",primaryTextColor:"#bf95f9",primaryBorderColor:"#bf95f9",secondaryColor:"#297d3e",secondaryTextColor:"#52fa7c",secondaryBorderColor:"#52fa7c",tertiaryColor:"#303952",tertiaryTextColor:"#6071a4",tertiaryBorderColor:"#6071a4",noteBkgColor:"#797d45",noteTextColor:"#f1fa89",noteBorderColor:"#f1fa89",edgeLabelBackground:"#604b7d",edgeLabelText:"#604b7d",actorLineColor:"#6071a4",activeTaskBkgColor:"#803d63",activeTaskBorderColor:"#ff7ac6",doneTaskBkgColor:"#297d3e",doneTaskBorderColor:"#52fa7c",critBkgColor:"#802c2c",critBorderColor:"#ff5757",taskTextColor:"#bf95f9",taskTextOutsideColor:"#bf95f9",taskTextLightColor:"#bf95f9",sectionBkgColor:"#bf95f9b3",sectionBkgColor2:"#bf95f966",altSectionBkgColor:"#323443",todayLineColor:"#ff7ac6",gridColor:"#6071a4",defaultLinkColor:"#8be8fd",altBackground:"#bf95f9",classText:"#bf95f9",fillType0:"#406080",fillType1:"#46747f",fillType2:"#297d3e",fillType3:"#805c36",fillType4:"#803d63",fillType5:"#604b7d",fillType6:"#802c2c",fillType7:"#797d45",fillType8:"#7c7c79",git0:"#ff5555",git1:"#ffb86c",git2:"#f1fa8c",git3:"#50fa7b",git4:"#8be9fd",git5:"#809fff",git6:"#ff79c6",git7:"#bd93f9",gitInv0:"#ff5555",gitInv1:"#ffb86c",gitInv2:"#f1fa8c",gitInv3:"#50fa7b",gitInv4:"#8be9fd",gitInv5:"#809fff",gitInv6:"#ff79c6",gitInv7:"#bd93f9",gitBranchLabel0:"#323443",gitBranchLabel1:"#323443",gitBranchLabel2:"#323443",gitBranchLabel3:"#323443",gitBranchLabel4:"#323443",gitBranchLabel5:"#323443",gitBranchLabel6:"#323443",gitBranchLabel7:"#323443",commitLabelColor:"#52fa7c",commitLabelBackground:"#297d3e"},flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:"14px",actorFontSize:"14px",messageFontSize:"16px"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}},default:{startOnLoad:!1,theme:"default",flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:"14px",actorFontSize:"14px",messageFontSize:"16px"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}},slate:{startOnLoad:!1,theme:"dark",flowchart:{htmlLabels:!1,useMaxWidth:!1},er:{useMaxWidth:!1},sequence:{useMaxWidth:!1,noteFontWeight:"14px",actorFontSize:"14px",messageFontSize:"16px"},journey:{useMaxWidth:!1},pie:{useMaxWidth:!1},gantt:{useMaxWidth:!1},gitGraph:{useMaxWidth:!1}}})}();
//# sourceMappingURL=material-extra-3rdparty-E-i8w1WA.js.map


================================================
FILE: docs/theme/assets/pymdownx-extras/material-extra-theme-TVq-kNRT.js
================================================
!function(){"use strict";var e;e=function(e){"true"===localStorage.getItem("data-md-prefers-color-scheme")&&document.querySelector("body").setAttribute("data-md-color-scheme",e.matches?"dracula":"default")},new MutationObserver((function(t){t.forEach((function(t){if("childList"===t.type&&t.addedNodes.length)for(var a=0;a<t.addedNodes.length;a++){var r=t.addedNodes[a];if(1===r.nodeType&&"body"===r.tagName.toLowerCase()){d=r,o=void 0,c=void 0,l=void 0,o="not all"!==window.matchMedia("(prefers-color-scheme)").media,c=localStorage.getItem("data-md-color-scheme"),l=localStorage.getItem("data-md-prefers-color-scheme"),c||(c="dracula"),l||(l="false"),"true"===l&&o?c=window.matchMedia("(prefers-color-scheme: dark)").matches?"dracula":"default":l="false",d.setAttribute("data-md-prefers-color-scheme",l),d.setAttribute("data-md-color-scheme",c),o&&window.matchMedia("(prefers-color-scheme: dark)").addListener(e);break}}var d,o,c,l}))})).observe(document.querySelector("html"),{childList:!0}),window.toggleScheme=function(){var e=document.querySelector("body"),t="not all"!==window.matchMedia("(prefers-color-scheme)").media,a=e.getAttribute("data-md-color-scheme"),r=e.getAttribute("data-md-prefers-color-scheme");t&&"default"===a&&"true"!==r?(r="true",a=window.matchMedia("(prefers-color-scheme: dark)").matches?"dracula":"default"):t&&"true"===r?(r="false",a="dracula"):"dracula"===a?(r="false",a="default"):(r="false",a="dracula"),localStorage.setItem("data-md-prefers-color-scheme",r),e.setAttribute("data-md-prefers-color-scheme",r),e.setAttribute("data-md-color-scheme",a)}}();
//# sourceMappingURL=material-extra-theme-TVq-kNRT.js.map


================================================
FILE: docs/theme/main.html
================================================
{% extends "base.html" %}

{% block libs %}
{{ super() }}
{% include "partials/libs.html" ignore missing %}
{% endblock %}


================================================
FILE: docs/theme/partials/footer.html
================================================

{% import "partials/language.html" as lang with context %}
<footer class="md-footer">
  {% if page.previous_page or page.next_page %}
    <nav
      class="md-footer__inner md-grid"
      aria-label="{{ lang.t('footer.title') }}"
    >
      {% if page.previous_page %}
        <a
          href="{{ page.previous_page.url | url }}"
          class="md-footer__link md-footer__link--prev"
          rel="prev"
        >
          <div class="md-footer__button md-icon">
            {% include ".icons/material/arrow-left.svg" %}
          </div>
          <div class="md-footer__title">
            <div class="md-ellipsis">
              <span class="md-footer__direction">
                {{ lang.t("footer.previous") }}
              </span>
              {{ page.previous_page.title }}
            </div>
          </div>
        </a>
      {% endif %}
      {% if page.next_page %}
        <a
          href="{{ page.next_page.url | url }}"
          class="md-footer__link md-footer__link--next"
          rel="next"
        >
          <div class="md-footer__title">
            <div class="md-ellipsis">
              <span class="md-footer__direction">
                {{ lang.t("footer.next") }}
              </span>
              {{ page.next_page.title }}
            </div>
          </div>
          <div class="md-footer__button md-icon">
            {% include ".icons/material/arrow-right.svg" %}
          </div>
        </a>
      {% endif %}
    </nav>
  {% endif %}
</footer>


================================================
FILE: docs/theme/partials/header.html
================================================

{% set site_url = config.site_url | d(nav.homepage.url, true) | url %}
{% if not config.use_directory_urls and site_url[0] == site_url[-1] == "." %}
  {% set site_url = site_url ~ "/index.html" %}
{% endif %}
<header class="md-header" data-md-component="header">
  <nav
    class="md-header__inner md-grid"
    aria-label="{{ lang.t('header.title') }}"
  >
    <a
      href="{{ site_url }}"
      title="{{ config.site_name | e }}"
      class="md-header__button md-logo"
      aria-label="{{ config.site_name }}"
    >
      {% include "partials/logo.html" %}
    </a>
    <label class="md-header__button md-icon" for="__drawer">
      {% include ".icons/material/menu" ~ ".svg" %}
    </label>
    <div class="md-header__title" data-md-component="header-title">
      <div class="md-header__ellipsis">
        <div class="md-header__topic">
          <span class="md-ellipsis">
            {{ config.site_name }}
          </span>
        </div>
        <div class="md-header__topic" data-md-component="header-topic">
          <span class="md-ellipsis">
            {% if page and page.meta and page.meta.title %}
              {{ page.meta.title }}
            {% else %}
              {{ page.title }}
            {% endif %}
          </span>
        </div>
      </div>
    </div>
    <div class="md-header__options">
      <div class="md-header-nav__scheme md-header-nav__button md-source__icon md-icon">
          <a
            href="javascript:toggleScheme();"
            title="Light mode"
            class="light-mode"
          >
          {% set icon = "material/weather-sunny" %}
          {% include ".icons/" ~ icon ~ ".svg" %}
          </a>
          <a
            href="javascript:toggleScheme();"
            title="Dark mode"
            class="dark-mode"
          >
          {% set icon = "material/weather-night" %}
          {% include ".icons/" ~ icon ~ ".svg" %}
          </a>
          <a
            href="javascript:toggleScheme();"
            title="System preference"
            class="system-mode"
          >
          {% set icon = "material/theme-light-dark" %}
          {% include ".icons/" ~ icon ~ ".svg" %}
          </a>
          <!-- <a
            href="javascript:toggleScheme();"
            title="Unknown scheme"
            class="unknown-mode"
          >
          {% set icon = "material/help-circle" %}
          {% include ".icons/" ~ icon ~ ".svg" %}
          </a> -->
      </div>
    </div>
    {% if "material/search" in config.plugins %}
      <label class="md-header__button md-icon" for="__search">
        {% include ".icons/material/magnify.svg" %}
      </label>
      {% include "partials/search.html" %}
    {% endif %}
    {% if config.repo_url %}
      <div class="md-header__source">
        {% include "partials/source.html" %}
      </div>
    {% endif %}
  </nav>
</header>


================================================
FILE: docs/theme/partials/libs.html
================================================
<script src="{{ 'assets/pymdownx-extras/material-extra-theme-TVq-kNRT.js' | url }}" type="text/javascript"></script>
<script src="{{ 'assets/pymdownx-extras/material-extra-3rdparty-E-i8w1WA.js' | url }}" type="text/javascript"></script>


================================================
FILE: docs/usage.md
================================================
## 1. Add your AI models

![resources tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/resources-tab.png)

- The tool uses Large Language Model (LLMs) to perform various tasks in a QA pipeline.
  So, you need to provide the application with access to the LLMs you want
  to use.
- You only need to provide at least one. However, it is recommended that you include all the LLMs
  that you have access to, you will be able to switch between them while using the
  application.

To add a model:

1. Navigate to the `Resources` tab.
2. Select the `LLMs` sub-tab.
3. Select the `Add` sub-tab.
4. Config the model to add:
   - Give it a name.
   - Pick a vendor/provider (e.g. `ChatOpenAI`).
   - Provide the specifications.
   - (Optional) Set the model as default.
5. Click `Add` to add the model.
6. Select `Embedding Models` sub-tab and repeat the step 3 to 5 to add an embedding model.

<details markdown>

<summary>(Optional) Configure model via the .env file</summary>

Alternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in
the folder of the application. If you don't see it, you can create one.

Currently, the following providers are supported:

### OpenAI

In the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order
to enable access to OpenAI's models. There are other variables that can be modified,
please feel free to edit them to fit your case. Otherwise, the default parameter should
work for most people.

```shell
OPENAI_API_BASE=https://api.openai.com/v1
OPENAI_API_KEY=<your OpenAI API key here>
OPENAI_CHAT_MODEL=gpt-3.5-turbo
OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
```

### Azure OpenAI

For OpenAI models via Azure platform, you need to provide your Azure endpoint and API
key. Your might also need to provide your developments' name for the chat model and the
embedding model depending on how you set up Azure development.

```shell
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_API_KEY=
OPENAI_API_VERSION=2024-02-15-preview # could be different for you
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo # change to your deployment name
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002 # change to your deployment name
```

### Local models

Pros:

- Privacy. Your documents will be stored and process locally.
- Choices. There are a wide range of LLMs in terms of size, domain, language to choose
  from.
- Cost. It's free.

Cons:

- Quality. Local models are much smaller and thus have lower generative quality than
  paid APIs.
- Speed. Local models are deployed using your machine so the processing speed is
  limited by your hardware.

#### Find and download a LLM

You can search and download a LLM to be ran locally from the [Hugging Face
Hub](https://huggingface.co/models). Currently, these model formats are supported:

- GGUF

You should choose a model whose size is less than your device's memory and should leave
about 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,
then you should choose a model that take up at most 10 GB of RAM. Bigger models tend to
give better generation but also take more processing time.

Here are some recommendations and their size in memory:

- [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true):
  around 2 GB

#### Enable local models

To add a local model to the model pool, set the `LOCAL_MODEL` variable in the `.env`
file to the path of the model file.

```shell
LOCAL_MODEL=<full path to your model file>
```

Here is how to get the full path of your model file:

- On Windows 11: right click the file and select `Copy as Path`.
</details>

## 2. Upload your documents

![file index tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/file-index-tab.png)

In order to do QA on your documents, you need to upload them to the application first.
Navigate to the `File Index` tab and you will see 2 sections:

1. File upload:
   - Drag and drop your file to the UI or select it from your file system.
     Then click `Upload and Index`.
   - The application will take some time to process the file and show a message once it is done.
2. File list:
   - This section shows the list of files that have been uploaded to the application and allows users to delete them.

## 3. Chat with your documents

![chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)

Now navigate back to the `Chat` tab. The chat tab is divided into 3 regions:

1. Conversation Settings Panel
   - Here you can select, create, rename, and delete conversations.
     - By default, a new conversation is created automatically if no conversation is selected.
   - Below that you have the file index, where you can choose whether to disable, select all files, or select which files to retrieve references from.
     - If you choose "Disabled", no files will be considered as context during chat.
     - If you choose "Search All", all files will be considered during chat.
     - If you choose "Select", a dropdown will appear for you to select the
       files to be considered during chat. If no files are selected, then no
       files will be considered during chat.
2. Chat Panel
   - This is where you can chat with the chatbot.
3. Information Panel

![information panel](https://raw.githubusercontent.com/Cinnamon/kotaemon/develop/docs/images/info-panel-scores.png)

- Supporting information such as the retrieved evidence and reference will be
  displayed here.
- Direct citation for the answer produced by the LLM is highlighted.
- The confidence score of the answer and relevant scores of evidences are displayed to quickly assess the quality of the answer and retrieved content.

- Meaning of the score displayed:
  - **Answer confidence**: answer confidence level from the LLM model.
  - **Relevance score**: overall relevant score between evidence and user question.
  - **Vectorstore score**: relevant score from vector embedding similarity calculation (show `full-text search` if retrieved from full-text search DB).
  - **LLM relevant score**: relevant score from LLM model (which judge relevancy between question and evidence using specific prompt).
  - **Reranking score**: relevant score from Cohere [reranking model](https://cohere.com/rerank).

Generally, the score quality is `LLM relevant score` > `Reranking score` > `Vectorscore`.
By default, overall relevance score is taken directly from LLM relevant score. Evidences are sorted based on their overall relevance score and whether they have citation or not.


================================================
FILE: flowsettings.py
================================================
import os
from importlib.metadata import version
from inspect import currentframe, getframeinfo
from pathlib import Path

from decouple import config
from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP
from theflow.settings.default import *  # noqa

cur_frame = currentframe()
if cur_frame is None:
    raise ValueError("Cannot get the current frame.")
this_file = getframeinfo(cur_frame).filename
this_dir = Path(this_file).parent

# change this if your app use a different name
KH_PACKAGE_NAME = "kotaemon_app"

KH_APP_VERSION = config("KH_APP_VERSION", None)
if not KH_APP_VERSION:
    try:
        # Caution: This might produce the wrong version
        # https://stackoverflow.com/a/59533071
        KH_APP_VERSION = version(KH_PACKAGE_NAME)
    except Exception:
        KH_APP_VERSION = "local"

KH_GRADIO_SHARE = config("KH_GRADIO_SHARE", default=False, cast=bool)
KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool)
KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/")

# App can be ran from anywhere and it's not trivial to decide where to store app data.
# So let's use the same directory as the flowsetting.py file.
KH_APP_DATA_DIR = this_dir / "ktem_app_data"
KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists()
KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)

# User data directory
KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data"
KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True)

# markdown output directory
KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir"
KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# chunks output directory
KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir"
KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# zip output directory
KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir"
KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# zip input directory
KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in"
KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True)

# HF models can be big, let's store them in the app data directory so that it's easier
# for users to manage their storage.
# ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache
os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface")
os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface")

# doc directory
KH_DOC_DIR = this_dir / "docs"

KH_MODE = "dev"
KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool)

KH_FEATURE_CHAT_SUGGESTION = config(
    "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool
)
KH_FEATURE_USER_MANAGEMENT = config(
    "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool
)
KH_USER_CAN_SEE_PUBLIC = None
KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
    config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
)
KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
    config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin")
)
KH_ENABLE_ALEMBIC = False
KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}"
KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files")
KH_WEB_SEARCH_BACKEND = (
    "kotaemon.indices.retrievers.tavily_web_search.WebSearch"
    # "kotaemon.indices.retrievers.jina_web_search.WebSearch"
)

KH_DOCSTORE = {
    # "__type__": "kotaemon.storages.ElasticsearchDocumentStore",
    # "__type__": "kotaemon.storages.SimpleFileDocumentStore",
    "__type__": "kotaemon.storages.LanceDBDocumentStore",
    "path": str(KH_USER_DATA_DIR / "docstore"),
}
KH_VECTORSTORE = {
    # "__type__": "kotaemon.storages.LanceDBVectorStore",
    "__type__": "kotaemon.storages.ChromaVectorStore",
    # "__type__": "kotaemon.storages.MilvusVectorStore",
    # "__type__": "kotaemon.storages.QdrantVectorStore",
    "path": str(KH_USER_DATA_DIR / "vectorstore"),
}
KH_LLMS = {}
KH_EMBEDDINGS = {}
KH_RERANKINGS = {}

# populate options from config
if config("AZURE_OPENAI_API_KEY", default="") and config(
    "AZURE_OPENAI_ENDPOINT", default=""
):
    if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""):
        KH_LLMS["azure"] = {
            "spec": {
                "__type__": "kotaemon.llms.AzureChatOpenAI",
                "temperature": 0,
                "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
                "api_key": config("AZURE_OPENAI_API_KEY", default=""),
                "api_version": config("OPENAI_API_VERSION", default="")
                or "2024-02-15-preview",
                "azure_deployment": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""),
                "timeout": 20,
            },
            "default": False,
        }
    if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""):
        KH_EMBEDDINGS["azure"] = {
            "spec": {
                "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
                "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
                "api_key": config("AZURE_OPENAI_API_KEY", default=""),
                "api_version": config("OPENAI_API_VERSION", default="")
                or "2024-02-15-preview",
                "azure_deployment": config(
                    "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""
                ),
                "timeout": 10,
            },
            "default": False,
        }

OPENAI_DEFAULT = "<YOUR_OPENAI_KEY>"
OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT)
GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key")
IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT

if OPENAI_API_KEY:
    KH_LLMS["openai"] = {
        "spec": {
            "__type__": "kotaemon.llms.ChatOpenAI",
            "temperature": 0,
            "base_url": config("OPENAI_API_BASE", default="")
            or "https://api.openai.com/v1",
            "api_key": OPENAI_API_KEY,
            "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"),
            "timeout": 20,
        },
        "default": IS_OPENAI_DEFAULT,
    }
    KH_EMBEDDINGS["openai"] = {
        "spec": {
            "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
            "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
            "api_key": OPENAI_API_KEY,
            "model": config(
                "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large"
            ),
            "timeout": 10,
            "context_length": 8191,
        },
        "default": IS_OPENAI_DEFAULT,
    }

VOYAGE_API_KEY = config("VOYAGE_API_KEY", default="")
if VOYAGE_API_KEY:
    KH_EMBEDDINGS["voyageai"] = {
        "spec": {
            "__type__": "kotaemon.embeddings.VoyageAIEmbeddings",
            "api_key": VOYAGE_API_KEY,
            "model": config("VOYAGE_EMBEDDINGS_MODEL", default="voyage-3-large"),
        },
        "default": False,
    }
    KH_RERANKINGS["voyageai"] = {
        "spec": {
            "__type__": "kotaemon.rerankings.VoyageAIReranking",
            "model_name": "rerank-2",
            "api_key": VOYAGE_API_KEY,
        },
        "default": False,
    }

if config("LOCAL_MODEL", default=""):
    KH_LLMS["ollama"] = {
        "spec": {
            "__type__": "kotaemon.llms.ChatOpenAI",
            "base_url": KH_OLLAMA_URL,
            "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
            "api_key": "ollama",
        },
        "default": False,
    }
    KH_LLMS["ollama-long-context"] = {
        "spec": {
            "__type__": "kotaemon.llms.LCOllamaChat",
            "base_url": KH_OLLAMA_URL.replace("v1/", ""),
            "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
            "num_ctx": 8192,
        },
        "default": False,
    }

    KH_EMBEDDINGS["ollama"] = {
        "spec": {
            "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
            "base_url": KH_OLLAMA_URL,
            "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
            "api_key": "ollama",
        },
        "default": False,
    }
    KH_EMBEDDINGS["fast_embed"] = {
        "spec": {
            "__type__": "kotaemon.embeddings.FastEmbedEmbeddings",
            "model_name": "BAAI/bge-base-en-v1.5",
        },
        "default": False,
    }

# additional LLM configurations
KH_LLMS["claude"] = {
    "spec": {
        "__type__": "kotaemon.llms.chats.LCAnthropicChat",
        "model_name": "claude-3-5-sonnet-20240620",
        "api_key": "your-key",
    },
    "default": False,
}
KH_LLMS["google"] = {
    "spec": {
        "__type__": "kotaemon.llms.chats.LCGeminiChat",
        "model_name": "gemini-1.5-flash",
        "api_key": GOOGLE_API_KEY,
    },
    "default": not IS_OPENAI_DEFAULT,
}
KH_LLMS["groq"] = {
    "spec": {
        "__type__": "kotaemon.llms.ChatOpenAI",
        "base_url": "https://api.groq.com/openai/v1",
        "model": "llama-3.1-8b-instant",
        "api_key": "your-key",
    },
    "default": False,
}
KH_LLMS["cohere"] = {
    "spec": {
        "__type__": "kotaemon.llms.chats.LCCohereChat",
        "model_name": "command-r-plus-08-2024",
        "api_key": config("COHERE_API_KEY", default="your-key"),
    },
    "default": False,
}
KH_LLMS["mistral"] = {
    "spec": {
        "__type__": "kotaemon.llms.ChatOpenAI",
        "base_url": "https://api.mistral.ai/v1",
        "model": "ministral-8b-latest",
        "api_key": config("MISTRAL_API_KEY", default="your-key"),
    },
    "default": False,
}

# additional embeddings configurations
KH_EMBEDDINGS["cohere"] = {
    "spec": {
        "__type__": "kotaemon.embeddings.LCCohereEmbeddings",
        "model": "embed-multilingual-v3.0",
        "cohere_api_key": config("COHERE_API_KEY", default="your-key"),
        "user_agent": "default",
    },
    "default": False,
}
KH_EMBEDDINGS["google"] = {
    "spec": {
        "__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
        "model": "models/text-embedding-004",
        "google_api_key": GOOGLE_API_KEY,
    },
    "default": not IS_OPENAI_DEFAULT,
}
KH_EMBEDDINGS["mistral"] = {
    "spec": {
        "__type__": "kotaemon.embeddings.LCMistralEmbeddings",
        "model": "mistral-embed",
        "api_key": config("MISTRAL_API_KEY", default="your-key"),
    },
    "default": False,
}
# KH_EMBEDDINGS["huggingface"] = {
#     "spec": {
#         "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
#         "model_name": "sentence-transformers/all-mpnet-base-v2",
#     },
#     "default": False,
# }

# default reranking models
KH_RERANKINGS["cohere"] = {
    "spec": {
        "__type__": "kotaemon.rerankings.CohereReranking",
        "model_name": "rerank-multilingual-v2.0",
        "cohere_api_key": config("COHERE_API_KEY", default=""),
    },
    "default": True,
}

KH_REASONINGS = [
    "ktem.reasoning.simple.FullQAPipeline",
    "ktem.reasoning.simple.FullDecomposeQAPipeline",
    "ktem.reasoning.react.ReactAgentPipeline",
    "ktem.reasoning.rewoo.RewooAgentPipeline",
]
KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool)
KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
    config("AZURE_OPENAI_ENDPOINT", default=""),
    config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
    config("OPENAI_API_VERSION", default=""),
)


SETTINGS_APP: dict[str, dict] = {}


SETTINGS_REASONING = {
    "use": {
        "name": "Reasoning options",
        "value": None,
        "choices": [],
        "component": "radio",
    },
    "lang": {
        "name": "Language",
        "value": "en",
        "choices": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()],
        "component": "dropdown",
    },
    "max_context_length": {
        "name": "Max context length (LLM)",
        "value": 32000,
        "component": "number",
    },
}

USE_GLOBAL_GRAPHRAG = config("USE_GLOBAL_GRAPHRAG", default=True, cast=bool)
USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool)
USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=True, cast=bool)

GRAPHRAG_INDEX_TYPES = []

if USE_MS_GRAPHRAG:
    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex")
if USE_NANO_GRAPHRAG:
    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex")
if USE_LIGHTRAG:
    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex")

KH_INDEX_TYPES = [
    "ktem.index.file.FileIndex",
    *GRAPHRAG_INDEX_TYPES,
]

GRAPHRAG_INDICES = [
    {
        "name": graph_type.split(".")[-1].replace("Index", "")
        + " Collection",  # get last name
        "config": {
            "supported_file_types": (
                ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
            ),
            "private": True,
        },
        "index_type": graph_type,
    }
    for graph_type in GRAPHRAG_INDEX_TYPES
]

KH_INDICES = [
    {
        "name": "File Collection",
        "config": {
            "supported_file_types": (
                ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
            ),
            "private": True,
        },
        "index_type": "ktem.index.file.FileIndex",
    },
    *GRAPHRAG_INDICES,
]


================================================
FILE: fly.toml
================================================
# fly.toml app configuration file generated for kotaemon on 2024-12-24T20:56:32+07:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#

app = 'kotaemon'
primary_region = 'sin'

[build]

[mounts]
  destination = "/app/ktem_app_data"
  source = "ktem_volume"

[http_service]
  internal_port = 7860
  force_https = true
  auto_stop_machines = 'suspend'
  auto_start_machines = true
  min_machines_running = 0
  processes = ['app']

[[vm]]
  memory = '4gb'
  cpu_kind = 'shared'
  cpus = 4


================================================
FILE: launch.sh
================================================
#!/bin/bash

if [ -z "$GRADIO_SERVER_NAME" ]; then
    export GRADIO_SERVER_NAME="0.0.0.0"
fi
if [ -z "$GRADIO_SERVER_PORT" ]; then
    export GRADIO_SERVER_PORT="7860"
fi

# Check if environment variable KH_DEMO_MODE is set to true
if [ "$KH_DEMO_MODE" = "true" ]; then
    echo "KH_DEMO_MODE is true. Launching in demo mode..."
    # Command to launch in demo mode
    GR_FILE_ROOT_PATH="/app" KH_FEATURE_USER_MANAGEMENT=false USE_LIGHTRAG=false .venv/bin/uvicorn sso_app_demo:app --host "$GRADIO_SERVER_NAME" --port "$GRADIO_SERVER_PORT"
else
    if [ "$KH_SSO_ENABLED" = "true" ]; then
        echo "KH_SSO_ENABLED is true. Launching in SSO mode..."
        GR_FILE_ROOT_PATH="/app" KH_SSO_ENABLED=true .venv/bin/uvicorn sso_app:app --host "$GRADIO_SERVER_NAME" --port "$GRADIO_SERVER_PORT"
    else
        ollama serve &
        .venv/bin/python app.py
    fi
fi


================================================
FILE: libs/kotaemon/README.md
================================================
# kotaemon

Quick and easy AI components to build Kotaemon

## Documentation

TBD

## Install

```shell
pip install kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git
```

## Contribute

### Setup

- Create conda environment (suggest 3.10)

  ```shell
  conda create -n kotaemon python=3.10
  conda activate kotaemon
  ```

- Clone the repo

  ```shell
  git clone git@github.com:Cinnamon/kotaemon.git
  cd kotaemon
  ```

- Install all

  ```shell
  pip install -e ".[dev]"
  ```

- Pre-commit

  ```shell
  pre-commit install
  ```

- Test

  ```shell
  pytest tests
  ```

### Credential sharing

This repo uses [git-secret](https://sobolevn.me/git-secret/) to share credentials, which
internally uses `gpg` to encrypt and decrypt secret files.

This repo uses `python-dotenv` to manage credentials stored as environment variable.
Please note that the use of `python-dotenv` and credentials are for development
purposes only. Thus, it should not be used in the main source code (i.e. `kotaemon/` and `tests/`), but can be used in `examples/`.

#### Install git-secret

Please follow the [official guide](https://sobolevn.me/git-secret/installation) to install git-secret.

For Windows users, see [For Windows users](#for-windows-users).

For users who don't have sudo privilege to install packages, follow the `Manual Installation` in the [official guide](https://sobolevn.me/git-secret/installation) and set `PREFIX` to a path that you have access to. And please don't forget to add `PREFIX` to your `PATH`.

#### Gaining access

In order to gain access to the secret files, you must provide your gpg public file to anyone who has access and ask them to ask your key to the keyring. For a quick tutorial on generating your gpg key pair, you can refer to the `Using gpg` section from the [git-secret main page](https://sobolevn.me/git-secret/).

#### Decrypt the secret file

The credentials are encrypted in the `.env.secret` file. To print the decrypted content to stdout, run

```shell
git-secret cat [filename]
```

Or to get the decrypted `.env` file, run

```shell
git-secret reveal [filename]
```

#### For Windows users

git-secret is currently not available for Windows, thus the easiest way is to use it in WSL (please use the latest version of WSL2). From there you have 2 options:

1. Using the gpg of WSL.

   This is the most straight-forward option since you would use WSL just like any other unix environment. However, the downside is that you have to make WSL your main environment, which means WSL must have write permission on your repo. To achieve this, you must either:

   - Clone and store your repo inside WSL's file system.
   - Provide WSL with necessary permission on your Windows file system. This can be achieve by setting `automount` options for WSL. To do that, add these content to `/etc/wsl.conf` and then restart your sub-system.

     ```shell
     [automount]
     options = "metadata,umask=022,fmask=011"
     ```

     This enables all permissions for user owner.

2. Using the gpg of Windows but with git-secret from WSL.

   For those who use Windows as the main environment, having to switch back and forth between Windows and WSL will be inconvenient. You can instead stay within your Windows environment and apply some tricks to use `git-secret` from WSL.

   - Install and setup `gpg` on Windows.
   - Install `git-secret` on WSL. Now in Windows, you can invoke `git-secret` using `wsl git-secret`.
   - Alternatively you can setup alias in CMD to shorten the syntax. Please refer to [this SO answer](https://stackoverflow.com/a/65823225) for the instruction. Some recommended aliases are:

     ```bat
     @echo off

     :: Commands
     DOSKEY ls=dir /B $*
     DOSKEY ll=dir /a $*
     DOSKEY git-secret=wsl git-secret $*
     DOSKEY gs=wsl git-secret $*
     ```

     Now you can invoke `git-secret` in CMD using `git-secret` or `gs`.

     - For Powershell users, similar behaviours can be achieved using `Set-Alias` and `profile.ps1`. Please refer this [SO thread](https://stackoverflow.com/questions/61081434/how-do-i-create-a-permanent-alias-file-in-powershell-core) as an example.

### Code base structure

- documents: define document
- loaders


================================================
FILE: libs/kotaemon/kotaemon/__init__.py
================================================
# Disable telemetry with monkey patching
import logging

logger = logging.getLogger(__name__)
try:
    import posthog

    def capture(*args, **kwargs):
        logger.info("posthog.capture called with args: %s, kwargs: %s", args, kwargs)

    posthog.capture = capture
except ImportError:
    pass

try:
    import os

    os.environ["HAYSTACK_TELEMETRY_ENABLED"] = "False"
    import haystack.telemetry

    haystack.telemetry.telemetry = None
except ImportError:
    pass


================================================
FILE: libs/kotaemon/kotaemon/agents/__init__.py
================================================
from .base import BaseAgent
from .io import AgentFinish, AgentOutput, AgentType, BaseScratchPad
from .langchain_based import LangchainAgent
from .react.agent import ReactAgent
from .rewoo.agent import RewooAgent
from .tools import (
    BaseTool,
    ComponentTool,
    GoogleSearchTool,
    LLMTool,
    MCPTool,
    WikipediaTool,
)

__all__ = [
    # agent
    "BaseAgent",
    "ReactAgent",
    "RewooAgent",
    "LangchainAgent",
    # tool
    "BaseTool",
    "ComponentTool",
    "GoogleSearchTool",
    "WikipediaTool",
    "LLMTool",
    "MCPTool",
    # io
    "AgentType",
    "AgentOutput",
    "AgentFinish",
    "BaseScratchPad",
]


================================================
FILE: libs/kotaemon/kotaemon/agents/base.py
================================================
from typing import Optional, Union

from kotaemon.base import BaseComponent, Node, Param
from kotaemon.llms import BaseLLM, PromptTemplate

from .io import AgentOutput, AgentType
from .tools import BaseTool


class BaseAgent(BaseComponent):
    """Define base agent interface"""

    name: str = Param(help="Name of the agent.")
    agent_type: AgentType = Param(help="Agent type, must be one of AgentType")
    description: str = Param(
        help=(
            "Description used to tell the model how/when/why to use the agent. You can"
            " provide few-shot examples as a part of the description. This will be"
            " input to the prompt of LLM."
        )
    )
    llm: Optional[BaseLLM] = Node(
        help=(
            "LLM to be used for the agent (optional). LLM must implement BaseLLM"
            " interface."
        )
    )
    prompt_template: Optional[Union[PromptTemplate, dict[str, PromptTemplate]]] = Param(
        help="A prompt template or a dict to supply different prompt to the agent"
    )
    plugins: list[BaseTool] = Param(
        default_callback=lambda _: [],
        help="List of plugins / tools to be used in the agent",
    )

    @staticmethod
    def safeguard_run(run_func, *args, **kwargs):
        def wrapper(self, *args, **kwargs):
            try:
                return run_func(self, *args, **kwargs)
            except Exception as e:
                return AgentOutput(
                    text="",
                    agent_type=self.agent_type,
                    status="failed",
                    error=str(e),
                )

        return wrapper

    def add_tools(self, tools: list[BaseTool]) -> None:
        """Helper method to add tools and update agent state if needed"""
        self.plugins.extend(tools)

    def run(self, *args, **kwargs) -> AgentOutput | list[AgentOutput]:
        """Run the component."""
        raise NotImplementedError()


================================================
FILE: libs/kotaemon/kotaemon/agents/io/__init__.py
================================================
from .base import AgentAction, AgentFinish, AgentOutput, AgentType, BaseScratchPad

__all__ = ["AgentOutput", "AgentFinish", "BaseScratchPad", "AgentType", "AgentAction"]


================================================
FILE: libs/kotaemon/kotaemon/agents/io/base.py
================================================
import json
import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Literal, NamedTuple, Optional, Union

from pydantic import ConfigDict

from kotaemon.base import LLMInterface


def check_log():
    """
    Checks if logging has been enabled.
    :return: True if logging has been enabled, False otherwise.
    :rtype: bool
    """
    return os.environ.get("LOG_PATH", None) is not None


class AgentType(Enum):
    """
    Enumerated type for agent types.
    """

    openai = "openai"
    openai_multi = "openai_multi"
    openai_tool = "openai_tool"
    self_ask = "self_ask"
    react = "react"
    rewoo = "rewoo"
    vanilla = "vanilla"


class BaseScratchPad:
    """
    Base class for output handlers.

    Attributes:
    -----------
    logger : logging.Logger
        The logger object to log messages.

    Methods:
    --------
    stop():
        Stop the output.

    update_status(output: str, **kwargs):
        Update the status of the output.

    thinking(name: str):
        Log that a process is thinking.

    done(_all=False):
        Log that the process is done.

    stream_print(item: str):
        Not implemented.

    json_print(item: Dict[str, Any]):
        Log a JSON object.

    panel_print(item: Any, title: str = "Output", stream: bool = False):
        Log a panel output.

    clear():
        Not implemented.

    print(content: str, **kwargs):
        Log arbitrary content.

    format_json(json_obj: str):
        Format a JSON object.

    debug(content: str, **kwargs):
        Log a debug message.

    info(content: str, **kwargs):
        Log an informational message.

    warning(content: str, **kwargs):
        Log a warning message.

    error(content: str, **kwargs):
        Log an error message.

    critical(content: str, **kwargs):
        Log a critical message.
    """

    def __init__(self):
        """
        Initialize the BaseOutput object.

        """
        self.logger = logging
        self.log = []

    def stop(self):
        """
        Stop the output.
        """

    def update_status(self, output: str, **kwargs):
        """
        Update the status of the output.
        """
        if check_log():
            self.logger.info(output)

    def thinking(self, name: str):
        """
        Log that a process is thinking.
        """
        if check_log():
            self.logger.info(f"{name} is thinking...")

    def done(self, _all=False):
        """
        Log that the process is done.
        """

        if check_log():
            self.logger.info("Done")

    def stream_print(self, item: str):
        """
        Stream print.
        """

    def json_print(self, item: Dict[str, Any]):
        """
        Log a JSON object.
        """
        if check_log():
            self.logger.info(json.dumps(item, indent=2))

    def panel_print(self, item: Any, title: str = "Output", stream: bool = False):
        """
        Log a panel output.

        Args:
            item : Any
                The item to log.
            title : str, optional
                The title of the panel, defaults to "Output".
            stream : bool, optional
        """
        if not stream:
            self.log.append(item)
        if check_log():
            self.logger.info("-" * 20)
            self.logger.info(item)
            self.logger.info("-" * 20)

    def clear(self):
        """
        Not implemented.
        """

    def print(self, content: str, **kwargs):
        """
        Log arbitrary content.
        """
        self.log.append(content)
        if check_log():
            self.logger.info(content)

    def format_json(self, json_obj: str):
        """
        Format a JSON object.
        """
        formatted_json = json.dumps(json_obj, indent=2)
        return formatted_json

    def debug(self, content: str, **kwargs):
        """
        Log a debug message.
        """
        if check_log():
            self.logger.debug(content, **kwargs)

    def info(self, content: str, **kwargs):
        """
        Log an informational message.
        """
        if check_log():
            self.logger.info(content, **kwargs)

    def warning(self, content: str, **kwargs):
        """
        Log a warning message.
        """
        if check_log():
            self.logger.warning(content, **kwargs)

    def error(self, content: str, **kwargs):
        """
        Log an error message.
        """
        if check_log():
            self.logger.error(content, **kwargs)

    def critical(self, content: str, **kwargs):
        """
        Log a critical message.
        """
        if check_log():
            self.logger.critical(content, **kwargs)


@dataclass
class AgentAction:
    """Agent's action to take.

    Args:
        tool: The tool to invoke.
        tool_input: The input to the tool.
        log: The log message.
    """

    tool: str
    tool_input: Union[str, dict]
    log: str


class AgentFinish(NamedTuple):
    """Agent's return value when finishing execution.

    Args:
        return_values: The return values of the agent.
        log: The log message.
    """

    return_values: dict
    log: str


class AgentOutput(LLMInterface):
    """Output from an agent.

    Args:
        text: The text output from the agent.
        agent_type: The type of agent.
        status: The status after executing the agent.
        error: The error message if any.
    """

    model_config = ConfigDict(extra="allow")

    text: str
    type: str = "agent"
    agent_type: AgentType
    status: Literal["thinking", "finished", "stopped", "failed"]
    error: Optional[str] = None
    intermediate_steps: Optional[list] = None


================================================
FILE: libs/kotaemon/kotaemon/agents/langchain_based.py
================================================
from typing import List, Optional

from langchain.agents import AgentType as LCAgentType
from langchain.agents import initialize_agent
from langchain.agents.agent import AgentExecutor as LCAgentExecutor

from kotaemon.llms import LLM, ChatLLM

from .base import BaseAgent
from .io import AgentOutput, AgentType
from .tools import BaseTool


class LangchainAgent(BaseAgent):
    """Wrapper for Langchain Agent"""

    name: str = "LangchainAgent"
    agent_type: AgentType
    description: str = "LangchainAgent for answering multi-step reasoning questions"
    AGENT_TYPE_MAP = {
        AgentType.openai: LCAgentType.OPENAI_FUNCTIONS,
        AgentType.openai_multi: LCAgentType.OPENAI_MULTI_FUNCTIONS,
        AgentType.react: LCAgentType.ZERO_SHOT_REACT_DESCRIPTION,
        AgentType.self_ask: LCAgentType.SELF_ASK_WITH_SEARCH,
    }
    agent: Optional[LCAgentExecutor] = None

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        if self.agent_type not in self.AGENT_TYPE_MAP:
            raise NotImplementedError(
                f"AgentType {self.agent_type } not supported by Langchain wrapper"
            )
        self.update_agent_tools()

    def update_agent_tools(self):
        assert isinstance(self.llm, (ChatLLM, LLM))
        langchain_plugins = [tool.to_langchain_format() for tool in self.plugins]

        # a fix for search_doc tool name:
        # use "Intermediate Answer" for self-ask agent
        found_search_tool = False
        if self.agent_type == AgentType.self_ask:
            for plugin in langchain_plugins:
                if plugin.name == "search_doc":
                    plugin.name = "Intermediate Answer"
                    langchain_plugins = [plugin]
                    found_search_tool = True
                    break

        if self.agent_type != AgentType.self_ask or found_search_tool:
            # reinit Langchain AgentExecutor
            self.agent = initialize_agent(
                langchain_plugins,
                self.llm.to_langchain_format(),
                agent=self.AGENT_TYPE_MAP[self.agent_type],
                handle_parsing_errors=True,
                verbose=True,
            )

    def add_tools(self, tools: List[BaseTool]) -> None:
        super().add_tools(tools)
        self.update_agent_tools()
        return

    def run(self, instruction: str) -> AgentOutput:
        assert (
            self.agent is not None
        ), "Lanchain AgentExecutor is not correctly initialized"

        # Langchain AgentExecutor call
        output = self.agent(instruction)["output"]

        return AgentOutput(
            text=output,
            agent_type=self.agent_type,
            status="finished",
        )


================================================
FILE: libs/kotaemon/kotaemon/agents/react/__init__.py
================================================
from .agent import ReactAgent

__all__ = ["ReactAgent"]


================================================
FILE: libs/kotaemon/kotaemon/agents/react/agent.py
================================================
import logging
import re
from functools import partial
from typing import Optional

import tiktoken

from kotaemon.agents.base import BaseAgent, BaseLLM
from kotaemon.agents.io import AgentAction, AgentFinish, AgentOutput, AgentType
from kotaemon.agents.tools import BaseTool
from kotaemon.base import Document, Param
from kotaemon.indices.splitters import TokenSplitter
from kotaemon.llms import PromptTemplate

FINAL_ANSWER_ACTION = "Final Answer:"


class ReactAgent(BaseAgent):
    """
    Sequential ReactAgent class inherited from BaseAgent.
    Implementing ReAct agent paradigm https://arxiv.org/pdf/2210.03629.pdf
    """

    name: str = "ReactAgent"
    agent_type: AgentType = AgentType.react
    description: str = "ReactAgent for answering multi-step reasoning questions"
    llm: BaseLLM
    prompt_template: Optional[PromptTemplate] = None
    output_lang: str = "English"
    plugins: list[BaseTool] = Param(
        default_callback=lambda _: [], help="List of tools to be used in the agent. "
    )
    examples: dict[str, str | list[str]] = Param(
        default_callback=lambda _: {}, help="Examples to be used in the agent. "
    )
    intermediate_steps: list[tuple[AgentAction | AgentFinish, str]] = Param(
        default_callback=lambda _: [],
        help="List of AgentAction and observation (tool) output",
    )
    max_iterations: int = 5
    strict_decode: bool = False
    max_context_length: int = Param(
        default=3000,
        help="Max context length for each tool output.",
    )
    trim_func: TokenSplitter | None = None

    def _compose_plugin_description(self) -> str:
        """
        Compose the worker prompt from the workers.

        Example:
        toolname1[input]: tool1 description
        toolname2[input]: tool2 description
        """
        prompt = ""
        try:
            for plugin in self.plugins:
                prompt += f"{plugin.name}[input]: {plugin.description}\n"
        except Exception:
            raise ValueError("Worker must have a name and description.")
        return prompt

    def _construct_scratchpad(
        self, intermediate_steps: list[tuple[AgentAction | AgentFinish, str]] = []
    ) -> str:
        """Construct the scratchpad that lets the agent continue its thought process."""
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought:"
        return thoughts

    def _parse_output(self, text: str) -> Optional[AgentAction | AgentFinish]:
        """
        Parse text output from LLM for the next Action or Final Answer
        Using Regex to parse "Action:\n Action Input:\n" for the next Action
        Using FINAL_ANSWER_ACTION to parse Final Answer

        Args:
            text[str]: input text to parse
        """
        includes_answer = FINAL_ANSWER_ACTION in text
        regex = (
            r"Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        )
        action_match = re.search(regex, text, re.DOTALL)
        action_output: Optional[AgentAction | AgentFinish] = None
        if action_match:
            if includes_answer:
                raise Exception(
                    "Parsing LLM output produced both a final answer "
                    f"and a parse-able action: {text}"
                )
            action = action_match.group(1).strip()
            action_input = action_match.group(2)
            tool_input = action_input.strip(" ")
            # ensure if its a well formed SQL query we don't remove any trailing " chars
            if tool_input.startswith("SELECT ") is False:
                tool_input = tool_input.strip('"')

            action_output = AgentAction(action, tool_input, text)

        elif includes_answer:
            action_output = AgentFinish(
                {"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
            )
        else:
            if self.strict_decode:
                raise Exception(f"Could not parse LLM output: `{text}`")
            else:
                action_output = AgentFinish({"output": text}, text)

        return action_output

    def _compose_prompt(self, instruction) -> str:
        """
        Compose the prompt from template, worker description, examples and instruction.
        """
        agent_scratchpad = self._construct_scratchpad(self.intermediate_steps)
        tool_description = self._compose_plugin_description()
        tool_names = ", ".join([plugin.name for plugin in self.plugins])
        if self.prompt_template is None:
            from .prompt import zero_shot_react_prompt

            self.prompt_template = zero_shot_react_prompt
        return self.prompt_template.populate(
            instruction=instruction,
            agent_scratchpad=agent_scratchpad,
            tool_description=tool_description,
            tool_names=tool_names,
            lang=self.output_lang,
        )

    def _format_function_map(self) -> dict[str, BaseTool]:
        """Format the function map for the open AI function API.

        Return:
            Dict[str, Callable]: The function map.
        """
        # Map the function name to the real function object.
        function_map = {}
        for plugin in self.plugins:
            function_map[plugin.name] = plugin
        return function_map

    def _trim(self, text: str | Document) -> str:
        """
        Trim the text to the maximum token length.
        """
        evidence_trim_func = (
            self.trim_func
            if self.trim_func
            else TokenSplitter(
                chunk_size=self.max_context_length,
                chunk_overlap=0,
                separator=" ",
                tokenizer=partial(
                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
                    allowed_special=set(),
                    disallowed_special="all",
                ),
            )
        )
        if isinstance(text, str):
            texts = evidence_trim_func([Document(text=text)])
        elif isinstance(text, Document):
            texts = evidence_trim_func([text])
        else:
            raise ValueError("Invalid text type to trim")
        trim_text = texts[0].text
        logging.info(f"len (trimmed): {len(trim_text)}")
        return trim_text

    def clear(self):
        """
        Clear and reset the agent.
        """
        self.intermediate_steps = []

    def run(self, instruction, max_iterations=None) -> AgentOutput:
        """
        Run the agent with the given instruction.

        Args:
            instruction: Instruction to run the agent with.
            max_iterations: Maximum number of iterations
                of reasoning steps, defaults to 10.

        Return:
            AgentOutput object.
        """
        if not max_iterations:
            max_iterations = self.max_iterations
        assert max_iterations > 0

        self.clear()
        logging.info(f"Running {self.name} with instruction: {instruction}")
        total_cost = 0.0
        total_token = 0
        status = "failed"
        response_text = None

        for step_count in range(1, max_iterations + 1):
            prompt = self._compose_prompt(instruction)
            logging.info(f"Prompt: {prompt}")
            response = self.llm(
                prompt, stop=["Observation:"]
            )  # could cause bugs if llm doesn't have `stop` as a parameter
            response_text = response.text
            logging.info(f"Response: {response_text}")
            action_step = self._parse_output(response_text)
            if action_step is None:
                raise ValueError("Invalid action")
            is_finished_chain = isinstance(action_step, AgentFinish)
            if is_finished_chain:
                result = ""
            else:
                assert isinstance(action_step, AgentAction)
                action_name = action_step.tool
                tool_input = action_step.tool_input
                logging.info(f"Action: {action_name}")
                logging.info(f"Tool Input: {tool_input}")
                function_map = self._format_function_map()
                if action_name not in function_map:
                    available = ", ".join(function_map.keys())
                    result = (
                        f"Tool '{action_name}' not found. "
                        f"Available tools: {available}"
                    )
                else:
                    result = function_map[action_name](tool_input)

                # trim the worker output to 1000 tokens, as we are appending
                # all workers' logs and it can exceed the token limit if we
                # don't limit each. Fix this number regarding to the LLM capacity.
                result = self._trim(result)
                logging.info(f"Result: {result}")

            self.intermediate_steps.append((action_step, result))
            if is_finished_chain:
                logging.info(f"Finished after {step_count} steps.")
                status = "finished"
                break
        else:
            status = "stopped"

        return AgentOutput(
            text=response_text,
            agent_type=self.agent_type,
            status=status,
            total_tokens=total_token,
            total_cost=total_cost,
            intermediate_steps=self.intermediate_steps,
            max_iterations=max_iterations,
        )

    def stream(self, instruction, max_iterations=None):
        """
        Stream the agent with the given instruction.

        Args:
            instruction: Instruction to run the agent with.
            max_iterations: Maximum number of iterations
                of reasoning steps, defaults to 10.

        Return:
            AgentOutput object.
        """
        if not max_iterations:
            max_iterations = self.max_iterations
        assert max_iterations > 0

        self.clear()
        logging.info(f"Running {self.name} with instruction: {instruction}")
        print(f"Running {self.name} with instruction: {instruction}")
        total_cost = 0.0
        total_token = 0
        status = "failed"
        response_text = None

        for step_count in range(1, max_iterations + 1):
            prompt = self._compose_prompt(instruction)
            logging.info(f"Prompt: {prompt}")
            print(f"Prompt: {prompt}")
            response = self.llm(
                prompt, stop=["Observation:"]
            )  # TODO: could cause bugs if llm doesn't have `stop` as a parameter
            response_text = response.text
            logging.info(f"Response: {response_text}")
            print(f"Response: {response_text}")
            action_step = self._parse_output(response_text)
            if action_step is None:
                raise ValueError("Invalid action")
            is_finished_chain = isinstance(action_step, AgentFinish)
            if is_finished_chain:
                result = response_text
                if "Final Answer:" in response_text:
                    result = response_text.split("Final Answer:")[-1].strip()
            else:
                assert isinstance(action_step, AgentAction)
                action_name = action_step.tool
                tool_input = action_step.tool_input
                logging.info(f"Action: {action_name}")
                print(f"Action: {action_name}")
                logging.info(f"Tool Input: {tool_input}")
                print(f"Tool Input: {tool_input}")
                function_map = self._format_function_map()
                if action_name not in function_map:
                    available = ", ".join(function_map.keys())
                    result = (
                        f"Tool '{action_name}' not found. "
                        f"Available tools: {available}"
                    )
                else:
                    result = function_map[action_name](tool_input)

                # trim the worker output to 1000 tokens, as we are appending
                # all workers' logs and it can exceed the token limit if we
                # don't limit each. Fix this number regarding to the LLM capacity.
                result = self._trim(result)
                logging.info(f"Result: {result}")
                print(f"Result: {result}")

            self.intermediate_steps.append((action_step, result))
            if is_finished_chain:
                logging.info(f"Finished after {step_count} steps.")
                status = "finished"
                yield AgentOutput(
                    text=result,
                    agent_type=self.agent_type,
                    status=status,
                    intermediate_steps=self.intermediate_steps[-1],
                )
                break
            else:
                yield AgentOutput(
                    text="",
                    agent_type=self.agent_type,
                    status="thinking",
                    intermediate_steps=self.intermediate_steps[-1],
                )

        else:
            status = "stopped"
            yield AgentOutput(
                text="",
                agent_type=self.agent_type,
                status=status,
                intermediate_steps=self.intermediate_steps[-1],
            )

        return AgentOutput(
            text=response_text,
            agent_type=self.agent_type,
            status=status,
            total_tokens=total_token,
            total_cost=total_cost,
            intermediate_steps=self.intermediate_steps,
            max_iterations=max_iterations,
        )


================================================
FILE: libs/kotaemon/kotaemon/agents/react/prompt.py
================================================
# flake8: noqa

from kotaemon.llms import PromptTemplate

zero_shot_react_prompt = PromptTemplate(
    template="""Answer the following questions as best you can. Give answer in {lang}. You have access to the following tools:
{tool_description}
Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do

Action: the action to take, should be one of [{tool_names}]

Action Input: the input to the action, should be different from the action input of the same action in previous steps.

Observation: the result of the action

... (this Thought/Action/Action Input/Observation can repeat N times)
#Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! After each Action Input.

Question: {instruction}
Thought:{agent_scratchpad}
    """
)


================================================
FILE: libs/kotaemon/kotaemon/agents/rewoo/__init__.py
================================================
from .agent import RewooAgent

__all__ = ["RewooAgent"]


================================================
FILE: libs/kotaemon/kotaemon/agents/rewoo/agent.py
================================================
import logging
import re
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from typing import Any

import tiktoken

from kotaemon.agents.base import BaseAgent
from kotaemon.agents.io import AgentOutput, AgentType, BaseScratchPad
from kotaemon.agents.tools import BaseTool
from kotaemon.agents.utils import get_plugin_response_content
from kotaemon.base import Document, Node, Param
from kotaemon.indices.qa.citation import CitationPipeline
from kotaemon.indices.splitters import TokenSplitter
from kotaemon.llms import BaseLLM, PromptTemplate

from .planner import Planner
from .solver import Solver


class RewooAgent(BaseAgent):
    """Distributive RewooAgent class inherited from BaseAgent.
    Implementing ReWOO paradigm https://arxiv.org/pdf/2305.18323.pdf"""

    name: str = "RewooAgent"
    agent_type: AgentType = AgentType.rewoo
    description: str = "RewooAgent for answering multi-step reasoning questions"
    output_lang: str = "English"
    planner_llm: BaseLLM
    solver_llm: BaseLLM
    prompt_template: dict[str, PromptTemplate] = Param(
        default_callback=lambda _: {},
        help="A dict to supply different prompt to the agent.",
    )
    plugins: list[BaseTool] = Param(
        default_callback=lambda _: [], help="A list of plugins to be used in the model."
    )
    examples: dict[str, str | list[str]] = Param(
        default_callback=lambda _: {}, help="Examples to be used in the agent."
    )
    max_context_length: int = Param(
        default=3000,
        help="Max context length for each tool output.",
    )
    trim_func: TokenSplitter | None = None

    @Node.auto(depends_on=["planner_llm", "plugins", "prompt_template", "examples"])
    def planner(self):
        return Planner(
            model=self.planner_llm,
            plugins=self.plugins,
            prompt_template=self.prompt_template.get("Planner", None),
            examples=self.examples.get("Planner", None),
        )

    @Node.auto(depends_on=["solver_llm", "prompt_template", "examples"])
    def solver(self):
        return Solver(
            model=self.solver_llm,
            prompt_template=self.prompt_template.get("Solver", None),
            examples=self.examples.get("Solver", None),
            output_lang=self.output_lang,
        )

    def _parse_plan_map(
        self, planner_response: str
    ) -> tuple[dict[str, list[str]], dict[str, str]]:
        """
        Parse planner output. It should be an n-to-n mapping from Plans to #Es.
        This is because sometimes LLM cannot follow the strict output format.
        Example:
            #Plan1
            #E1
            #E2
        should result in: {"#Plan1": ["#E1", "#E2"]}
        Or:
            #Plan1
            #Plan2
            #E1
        should result in: {"#Plan1": [], "#Plan2": ["#E1"]}
        This function should also return a plan map.

        Returns:
            tuple[Dict[str, List[str]], Dict[str, str]]: A list of plan map
        """
        valid_chunk = [
            line
            for line in planner_response.splitlines()
            if line.startswith("#Plan") or line.startswith("#E")
        ]

        plan_to_es: dict[str, list[str]] = dict()
        plans: dict[str, str] = dict()
        prev_key = ""
        for line in valid_chunk:
            key, description = line.split(":", 1)
            key = key.strip()
            if key.startswith("#Plan"):
                plans[key] = description.strip()
                plan_to_es[key] = []
                prev_key = key
            elif key.startswith("#E"):
                plan_to_es[prev_key].append(key)

        return plan_to_es, plans

    def _parse_planner_evidences(
        self, planner_response: str
    ) -> tuple[dict[str, str], list[list[str]]]:
        """
        Parse planner output. This should return a mapping from #E to tool call.
        It should also identify the level of each #E in dependency map.
        Example:
            {
            "#E1": "Tool1", "#E2": "Tool2",
            "#E3": "Tool3", "#E4": "Tool4"
            }, [[#E1, #E2], [#E3, #E4]]

        Returns:
            tuple[dict[str, str], List[List[str]]]:
            A mapping from #E to tool call and a list of levels.
        """
        evidences: dict[str, str] = dict()
        dependence: dict[str, list[str]] = dict()
        for line in planner_response.splitlines():
            if line.startswith("#E") and line[2].isdigit():
                e, tool_call = line.split(":", 1)
                e, tool_call = e.strip(), tool_call.strip()
                if len(e) == 3:
                    dependence[e] = []
                    evidences[e] = tool_call
                    for var in re.findall(r"#E\d+", tool_call):
                        if var in evidences:
                            dependence[e].append(var)
                else:
                    evidences[e] = "No evidence found"
        level = []
        while dependence:
            select = [i for i in dependence if not dependence[i]]
            if len(select) == 0:
                raise ValueError("Circular dependency detected.")
            level.append(select)
            for item in select:
                dependence.pop(item)
            for item in dependence:
                for i in select:
                    if i in dependence[item]:
                        dependence[item].remove(i)

        return evidences, level

    def _run_plugin(
        self,
        e: str,
        planner_evidences: dict[str, str],
        worker_evidences: dict[str, str],
        output=BaseScratchPad(),
    ):
        """
        Run a plugin for a given evidence.
        This function should also cumulate the cost and tokens.
        """
        result = dict(e=e, plugin_cost=0, plugin_token=0, evidence="")
        tool_call = planner_evidences[e]
        if "[" not in tool_call:
            result["evidence"] = tool_call
        else:
            tool, tool_input = tool_call.split("[", 1)
            tool_input = tool_input[:-1]
            # find variables in input and replace with previous evidences
            for var in re.findall(r"#E\d+", tool_input):
                print("Tool input: ", tool_input)
                print("Var: ", var)
                print("Worker evidences: ", worker_evidences)
                if var in worker_evidences:
                    tool_input = tool_input.replace(
                        var, worker_evidences.get(var, "") or ""
                    )
            try:
                selected_plugin = self._find_plugin(tool)
                if selected_plugin is None:
                    raise ValueError("Invalid plugin detected")
                tool_response = selected_plugin(tool_input)
                result["evidence"] = get_plugin_response_content(tool_response)
            except ValueError:
                result["evidence"] = "No evidence found."
            finally:
                output.panel_print(
                    result["evidence"], f"[green] Function Response of [blue]{tool}: "
                )
        return result

    def _get_worker_evidence(
        self,
        planner_evidences: dict[str, str],
        evidences_level: list[list[str]],
        output=BaseScratchPad(),
    ) -> Any:
        """
        Parallel execution of plugins in DAG for speedup.
        This is one of core benefits of ReWOO agents.

        Args:
            planner_evidences: A mapping from #E to tool call.
            evidences_level: A list of levels of evidences.
                Calculated from DAG of plugin calls.
            output: Output object, defaults to BaseOutput().
        Returns:
            A mapping from #E to tool call.
        """
        worker_evidences: dict[str, str] = dict()
        plugin_cost, plugin_token = 0.0, 0.0
        with ThreadPoolExecutor() as pool:
            for level in evidences_level:
                results = []
                for e in level:
                    results.append(
                        pool.submit(
                            self._run_plugin,
                            e,
                            planner_evidences,
                            worker_evidences,
                            output,
                        )
                    )
                if len(results) > 1:
                    output.update_status(f"Running tasks {level} in parallel.")
                else:
                    output.update_status(f"Running task {level[0]}.")
                for r in results:
                    resp = r.result()
                    plugin_cost += resp["plugin_cost"]
                    plugin_token += resp["plugin_token"]
                    worker_evidences[resp["e"]] = self._trim_evidence(resp["evidence"])
                output.done()

        return worker_evidences, plugin_cost, plugin_token

    def _find_plugin(self, name: str):
        for p in self.plugins:
            if p.name == name:
                return p

    def _trim_evidence(self, evidence: str):
        evidence_trim_func = (
            self.trim_func
            if self.trim_func
            else TokenSplitter(
                chunk_size=self.max_context_length,
                chunk_overlap=0,
                separator=" ",
                tokenizer=partial(
                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
                    allowed_special=set(),
                    disallowed_special="all",
                ),
            )
        )
        if evidence:
            texts = evidence_trim_func([Document(text=evidence)])
            evidence = texts[0].text
            logging.info(f"len (trimmed): {len(evidence)}")
            return evidence

    @BaseAgent.safeguard_run
    def run(self, instruction: str, use_citation: bool = False) -> AgentOutput:
        """
        Run the agent with a given instruction.
        """
        logging.info(f"Running {self.name} with instruction: {instruction}")
        total_cost = 0.0
        total_token = 0

        # Plan
        planner_output = self.planner(instruction)
        planner_text_output = planner_output.text
        plan_to_es, plans = self._parse_plan_map(planner_text_output)
        planner_evidences, evidence_level = self._parse_planner_evidences(
            planner_text_output
        )

        # Work
        worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence(
            planner_evidences, evidence_level
        )
        worker_log = ""
        for plan in plan_to_es:
            worker_log += f"{plan}: {plans[plan]}\n"
            for e in plan_to_es[plan]:
                worker_log += f"{e}: {worker_evidences[e]}\n"

        # Solve
        solver_output = self.solver(instruction, worker_log)
        solver_output_text = solver_output.text
        if use_citation:
            citation_pipeline = CitationPipeline(llm=self.solver_llm)
            citation = citation_pipeline(context=worker_log, question=instruction)
        else:
            citation = None

        return AgentOutput(
            text=solver_output_text,
            agent_type=self.agent_type,
            status="finished",
            total_tokens=total_token,
            total_cost=total_cost,
            citation=citation,
            metadata={"citation": citation, "worker_log": worker_log},
        )

    def stream(self, instruction: str, use_citation: bool = False):
        """
        Stream the agent with a given instruction.
        """
        logging.info(f"Streaming {self.name} with instruction: {instruction}")
        total_cost = 0.0
        total_token = 0

        # Plan
        planner_output = self.planner(instruction)
        planner_text_output = planner_output.text
        plan_to_es, plans = self._parse_plan_map(planner_text_output)
        planner_evidences, evidence_level = self._parse_planner_evidences(
            planner_text_output
        )

        print("Planner output:", planner_text_output)
        # output planner to info panel
        yield AgentOutput(
            text="",
            agent_type=self.agent_type,
            status="thinking",
            intermediate_steps=[{"planner_log": planner_text_output}],
        )

        # Work
        worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence(
            planner_evidences, evidence_level
        )
        worker_log = ""
        for plan in plan_to_es:
            worker_log += f"{plan}: {plans[plan]}\n"
            current_progress = f"{plan}: {plans[plan]}\n"
            for e in plan_to_es[plan]:
                worker_log += f"#Action: {planner_evidences.get(e, None)}\n"
                worker_log += f"{e}: {worker_evidences[e]}\n"
                current_progress += f"#Action: {planner_evidences.get(e, None)}\n"
                current_progress += f"{e}: {worker_evidences[e]}\n"

            yield AgentOutput(
                text="",
                agent_type=self.agent_type,
                status="thinking",
                intermediate_steps=[{"worker_log": current_progress}],
            )

        # Solve
        solver_response = ""
        for solver_output in self.solver.stream(instruction, worker_log):
            solver_output_text = solver_output.text
            solver_response += solver_output_text
            yield AgentOutput(
                text=solver_output_text,
                agent_type=self.agent_type,
                status="thinking",
            )
        if use_citation:
            citation_pipeline = CitationPipeline(llm=self.solver_llm)
            citation = citation_pipeline.invoke(
                context=worker_log, question=instruction
            )
        else:
            citation = None

        return AgentOutput(
            text="",
            agent_type=self.agent_type,
            status="finished",
            total_tokens=total_token,
            total_cost=total_cost,
            citation=citation,
            metadata={"citation": citation, "worker_log": worker_log},
        )


================================================
FILE: libs/kotaemon/kotaemon/agents/rewoo/planner.py
================================================
from typing import Any, List, Optional, Union

from kotaemon.agents.base import BaseLLM, BaseTool
from kotaemon.agents.io import BaseScratchPad
from kotaemon.base import BaseComponent
from kotaemon.llms import PromptTemplate

from .prompt import few_shot_planner_prompt, zero_shot_planner_prompt


class Planner(BaseComponent):
    model: BaseLLM
    prompt_template: Optional[PromptTemplate] = None
    examples: Optional[Union[str, List[str]]] = None
    plugins: List[BaseTool]

    def _compose_worker_description(self) -> str:
        """
        Compose the worker prompt from the workers.

        Example:
        toolname1[input]: tool1 description
        toolname2[input]: tool2 description
        """
        prompt = ""
        try:
            for worker in self.plugins:
                prompt += f"{worker.name}[input]: {worker.description}\n"
        except Exception:
            raise ValueError("Worker must have a name and description.")
        return prompt

    def _compose_fewshot_prompt(self) -> str:
        if self.examples is None:
            return ""
        if isinstance(self.examples, str):
            return self.examples
        else:
            return "\n\n".join([e.strip("\n") for e in self.examples])

    def _compose_prompt(self, instruction) -> str:
        """
        Compose the prompt from template, worker description, examples and instruction.
        """
        worker_desctription = self._compose_worker_description()
        fewshot = self._compose_fewshot_prompt()
        if self.prompt_template is not None:
            if "fewshot" in self.prompt_template.placeholders:
                return self.prompt_template.populate(
                    tool_description=worker_desctription,
                    fewshot=fewshot,
                    task=instruction,
                )
            else:
                return self.prompt_template.populate(
                    tool_description=worker_desctription, task=instruction
                )
        else:
            if self.examples is not None:
                return few_shot_planner_prompt.populate(
                    tool_description=worker_desctription,
                    fewshot=fewshot,
                    task=instruction,
                )
            else:
                return zero_shot_planner_prompt.populate(
                    tool_description=worker_desctription, task=instruction
                )

    def run(self, instruction: str, output: BaseScratchPad = BaseScratchPad()) -> Any:
        response = None
        output.info("Running Planner")
        prompt = self._compose_prompt(instruction)
        output.debug(f"Prompt: {prompt}")
        try:
            response = self.model(prompt)
            self.log_progress(".planner", response=response)
            output.info("Planner run successful.")
        except ValueError as e:
            output.error("Planner failed to retrieve response from LLM")
            raise ValueError("Planner failed to retrieve response from LLM") from e

        return response

    def stream(self, instruction: str, output: BaseScratchPad = BaseScratchPad()):
        response = None
        output.info("Running Planner")
        prompt = self._compose_prompt(instruction)
        output.debug(f"Prompt: {prompt}")

        response = ""
        try:
            for text in self.model.stream(prompt):
                response += text
                yield text
            self.log_progress(".planner", response=response)
            output.info("Planner run successful.")
        except NotImplementedError:
            print("Streaming is not supported, falling back to normal run")
            response = self.model(prompt)
            yield response
        except ValueError as e:
            output.error("Planner failed to retrieve response from LLM")
            raise ValueError("Planner failed to retrieve response from LLM") from e

        return response


================================================
FILE: libs/kotaemon/kotaemon/agents/rewoo/prompt.py
================================================
# flake8: noqa

from kotaemon.llms import PromptTemplate

zero_shot_planner_prompt = PromptTemplate(
    template="""You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.
For each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.
You should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.

##Available Tools##
{tool_description}

##Output Format (Replace '<...>')##
#Plan1: <describe your plan here>
#E1: <toolname>[<input here>] (eg. Search[What is Python])
#Plan2: <describe next plan>
#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]
And so on...

##Your Task##
{task}

##Now Begin##
"""
)

one_shot_planner_prompt = PromptTemplate(
    template="""You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.
For each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.
You should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.

##Available Tools##
{tool_description}

##Output Format##
#Plan1: <describe your plan here>
#E1: <toolname>[<input here>]
#Plan2: <describe next plan>
#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]
And so on...

##Example##
Task: What is the 4th root of 64 to the power of 3?
#Plan1: Find the 4th root of 64
#E1: Calculator[64^(1/4)]
#Plan2: Raise the result from #Plan1 to the power of 3
#E2: Calculator[#E1^3]

##Your Task##
{task}

##Now Begin##
"""
)


few_shot_planner_prompt = PromptTemplate(
    template="""You are an AI agent who makes step-by-step plans to solve a problem under the help of external tools.
For each step, make one plan followed by one tool-call, which will be executed later to retrieve evidence for that step.
You should store each evidence into a distinct variable #E1, #E2, #E3 ... that can be referred to in later tool-call inputs.

##Available Tools##
{tool_description}

##Output Format (Replace '<...>')##
#Plan1: <describe your plan here>
#E1: <toolname>[<input>]
#Plan2: <describe next plan>
#E2: <toolname>[<input, you can use #E1 to represent its expected output>]
And so on...

##Examples##
{fewshot}

##Your Task##
{task}

##Now Begin##
"""
)

zero_shot_solver_prompt = PromptTemplate(
    template="""You are an AI agent who solves a problem with my assistance. I will provide step-by-step plans(#Plan) and evidences(#E) that could be helpful.
Your task is to briefly summarize each step, then make a short final conclusion for your task. Give answer in {lang}.

##My Plans and Evidences##
{plan_evidence}

##Example Output##
First, I <did something> , and I think <...>; Second, I <...>, and I think <...>; ....
So, <your conclusion>.

##Your Task##
{task}

##Now Begin##
"""
)

few_shot_solver_prompt = PromptTemplate(
    template="""You are an AI agent who solves a problem with my assistance. I will provide step-by-step plans and evidences that could be helpful.
Your task is to briefly summarize each step, then make a short final conclusion for your task. Give answer in {lang}.

##My Plans and Evidences##
{plan_evidence}

##Example Output##
First, I <did something> , and I think <...>; Second, I <...>, and I think <...>; ....
So, <your conclusion>.

##Example##
{fewshot}

##Your Task##
{task}

##Now Begin##
"""
)


================================================
FILE: libs/kotaemon/kotaemon/agents/rewoo/solver.py
================================================
from typing import Any, List, Optional, Union

from kotaemon.agents.io import BaseScratchPad
from kotaemon.base import BaseComponent
from kotaemon.llms import BaseLLM, PromptTemplate

from .prompt import few_shot_solver_prompt, zero_shot_solver_prompt


class Solver(BaseComponent):
    model: BaseLLM
    prompt_template: Optional[PromptTemplate] = None
    examples: Optional[Union[str, List[str]]] = None
    output_lang: str = "English"

    def _compose_fewshot_prompt(self) -> str:
        if self.examples is None:
            return ""
        if isinstance(self.examples, str):
            return self.examples
        else:
            return "\n\n".join([e.strip("\n") for e in self.examples])

    def _compose_prompt(self, instruction, plan_evidence, output_lang) -> str:
        """
        Compose the prompt from template, plan&evidence, examples and instruction.
        """
        fewshot = self._compose_fewshot_prompt()
        if self.prompt_template is not None:
            if "fewshot" in self.prompt_template.placeholders:
                return self.prompt_template.populate(
                    plan_evidence=plan_evidence,
                    fewshot=fewshot,
                    task=instruction,
                    lang=output_lang,
                )
            else:
                return self.prompt_template.populate(
                    plan_evidence=plan_evidence, task=instruction, lang=output_lang
                )
        else:
            if self.examples is not None:
                return few_shot_solver_prompt.populate(
                    plan_evidence=plan_evidence,
                    fewshot=fewshot,
                    task=instruction,
                    lang=output_lang,
                )
            else:
                return zero_shot_solver_prompt.populate(
                    plan_evidence=plan_evidence,
                    task=instruction,
                    lang=output_lang,
                )

    def run(
        self,
        instruction: str,
        plan_evidence: str,
        output: BaseScratchPad = BaseScratchPad(),
    ) -> Any:
        response = None
        output.info("Running Solver")
        output.debug(f"Instruction: {instruction}")
        output.debug(f"Plan Evidence: {plan_evidence}")
        prompt = self._compose_prompt(instruction, plan_evidence, self.output_lang)
        output.debug(f"Prompt: {prompt}")
        try:
            response = self.model(prompt)
            output.info("Solver run successful.")
        except ValueError:
            output.error("Solver failed to retrieve response from LLM")

        return response

    def stream(
        self,
        instruction: str,
        plan_evidence: str,
        output: BaseScratchPad = BaseScratchPad(),
    ) -> Any:
        response = ""
        output.info("Running Solver")
        output.debug(f"Instruction: {instruction}")
        output.debug(f"Plan Evidence: {plan_evidence}")
        prompt = self._compose_prompt(instruction, plan_evidence, self.output_lang)
        output.debug(f"Prompt: {prompt}")
        try:
            for text in self.model.stream(prompt):
                response += text.text
                yield text
            output.info("Planner run successful.")
        except NotImplementedError:
            response = self.model(prompt).text
            output.info("Solver run successful.")
        except ValueError:
            output.error("Solver failed to retrieve response from LLM")

        return response


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/__init__.py
================================================
from .base import BaseTool, ComponentTool
from .google import GoogleSearchTool
from .llm import LLMTool
from .mcp import (
    MCPTool,
    build_args_model,
    create_tools_from_config,
    discover_tools_info,
    format_tool_list,
    parse_mcp_config,
)
from .wikipedia import WikipediaTool

__all__ = [
    "BaseTool",
    "ComponentTool",
    "GoogleSearchTool",
    "WikipediaTool",
    "LLMTool",
    "MCPTool",
    "build_args_model",
    "create_tools_from_config",
    "discover_tools_info",
    "format_tool_list",
    "parse_mcp_config",
]


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/base.py
================================================
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union

from langchain.agents import Tool as LCTool
from pydantic import BaseModel

from kotaemon.base import BaseComponent


class ToolException(Exception):
    """An optional exception that tool throws when execution error occurs.

    When this exception is thrown, the agent will not stop working,
    but will handle the exception according to the handle_tool_error
    variable of the tool, and the processing result will be returned
    to the agent as observation, and printed in red on the console.
    """


class BaseTool(BaseComponent):
    name: str
    """The unique name of the tool that clearly communicates its purpose."""
    description: str
    """Description used to tell the model how/when/why to use the tool.
    You can provide few-shot examples as a part of the description. This will be
    input to the prompt of LLM.
    """
    args_schema: Optional[Type[BaseModel]] = None
    """Pydantic model class to validate and parse the tool's input arguments."""
    verbose: bool = False
    """Whether to log the tool's progress."""
    handle_tool_error: Optional[
        Union[bool, str, Callable[[ToolException], str]]
    ] = False
    """Handle the content of the ToolException thrown."""

    def _parse_input(
        self,
        tool_input: Union[str, Dict],
    ) -> Union[str, Dict[str, Any]]:
        """Convert tool input to pydantic model."""
        args_schema = self.args_schema
        if isinstance(tool_input, str):
            if args_schema is not None:
                key_ = next(iter(args_schema.model_fields.keys()))
                args_schema.validate({key_: tool_input})
            return tool_input
        else:
            if args_schema is not None:
                result = args_schema.parse_obj(tool_input)
                return {k: v for k, v in result.dict().items() if k in tool_input}
        return tool_input

    def _run_tool(
        self,
        *args: Any,
        **kwargs: Any,
    ) -> Any:
        """Call tool."""
        raise NotImplementedError(f"_run_tool is not implemented for {self.name}")

    def _to_args_and_kwargs(self, tool_input: Union[str, Dict]) -> Tuple[Tuple, Dict]:
        # For backwards compatibility, if run_input is a string,
        # pass as a positional argument.
        if isinstance(tool_input, str):
            return (tool_input,), {}
        else:
            return (), tool_input

    def _handle_tool_error(self, e: ToolException) -> Any:
        """Handle the content of the ToolException thrown."""
        observation = None
        if not self.handle_tool_error:
            raise e
        elif isinstance(self.handle_tool_error, bool):
            if e.args:
                observation = e.args[0]
            else:
                observation = "Tool execution error"
        elif isinstance(self.handle_tool_error, str):
            observation = self.handle_tool_error
        elif callable(self.handle_tool_error):
            observation = self.handle_tool_error(e)
        else:
            raise ValueError(
                f"Got unexpected type of `handle_tool_error`. Expected bool, str "
                f"or callable. Received: {self.handle_tool_error}"
            )
        return observation

    def to_langchain_format(self) -> LCTool:
        """Convert this tool to Langchain format to use with its agent"""
        return LCTool(name=self.name, description=self.description, func=self.run)

    def run(
        self,
        tool_input: Union[str, Dict],
        verbose: Optional[bool] = None,
        **kwargs: Any,
    ) -> Any:
        """Run the tool."""
        parsed_input = self._parse_input(tool_input)
        # TODO (verbose_): Add logging
        try:
            tool_args, tool_kwargs = self._to_args_and_kwargs(parsed_input)
            call_kwargs = {**kwargs, **tool_kwargs}
            observation = self._run_tool(*tool_args, **call_kwargs)
        except ToolException as e:
            observation = self._handle_tool_error(e)
            return observation
        else:
            return observation

    @classmethod
    def from_langchain_format(cls, langchain_tool: LCTool) -> "BaseTool":
        """Wrapper for Langchain Tool"""
        new_tool = BaseTool(
            name=langchain_tool.name, description=langchain_tool.description
        )
        new_tool._run_tool = langchain_tool._run  # type: ignore
        return new_tool


class ComponentTool(BaseTool):
    """Wrapper around other BaseComponent to use it as a tool

    Args:
        component: BaseComponent-based component to wrap
        postprocessor: Optional postprocessor for the component output
    """

    component: BaseComponent
    postprocessor: Optional[Callable] = None

    def _run_tool(self, *args: Any, **kwargs: Any) -> Any:
        output = self.component(*args, **kwargs)
        if self.postprocessor:
            output = self.postprocessor(output)

        return output


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/google.py
================================================
from typing import AnyStr, Optional, Type
from urllib.error import HTTPError

from langchain_community.utilities import SerpAPIWrapper
from pydantic import BaseModel, Field

from .base import BaseTool


class GoogleSearchArgs(BaseModel):
    query: str = Field(..., description="a search query")


class GoogleSearchTool(BaseTool):
    name: str = "google_search"
    description: str = (
        "A search engine retrieving top search results as snippets from Google. "
        "Input should be a search query."
    )
    args_schema: Optional[Type[BaseModel]] = GoogleSearchArgs

    def _run_tool(self, query: AnyStr) -> str:
        try:
            from googlesearch import search
        except ImportError:
            raise ImportError(
                "install googlesearch using `pip3 install googlesearch-python` to "
                "use this tool"
            )

        try:
            output = ""
            search_results = search(query, advanced=True)
            if search_results:
                output = "\n".join(
                    "{} {}".format(item.title, item.description)
                    for item in search_results
                )
        except HTTPError:
            output = "No evidence found."

        return output


class SerpTool(BaseTool):
    name = "google_search"
    description = (
        "Worker that searches results from Google. Useful when you need to find short "
        "and succinct answers about a specific topic. Input should be a search query."
    )
    args_schema: Optional[Type[BaseModel]] = GoogleSearchArgs

    def _run_tool(self, query: AnyStr) -> str:
        tool = SerpAPIWrapper()
        evidence = tool.run(query)

        return evidence


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/llm.py
================================================
from typing import AnyStr, Optional, Type

from pydantic import BaseModel, Field

from kotaemon.agents.tools.base import ToolException
from kotaemon.llms import BaseLLM

from .base import BaseTool


class LLMArgs(BaseModel):
    query: str = Field(..., description="a search question or prompt")


class LLMTool(BaseTool):
    name: str = "llm"
    description: str = (
        "A pretrained LLM like yourself. Useful when you need to act with "
        "general world knowledge and common sense. Prioritize it when you "
        "are confident in solving the problem "
        "yourself. Input can be any instruction."
    )
    llm: BaseLLM
    args_schema: Optional[Type[BaseModel]] = LLMArgs
    dummy_mode: bool = True

    def _run_tool(self, query: AnyStr) -> str:
        output = None
        try:
            if not self.dummy_mode:
                response = self.llm(query)
            else:
                response = None
        except ValueError:
            raise ToolException("LLM Tool call failed")
        output = response.text if response else "<->"
        return output


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/mcp.py
================================================
"""MCP Tool for kotaemon agents.

Bridges the MCP SDK's tool schema with kotaemon's BaseTool abstraction
so MCP tools can be seamlessly used by ReAct/ReWOO agents.

This module contains:
- MCPTool: BaseTool wrapper for individual MCP server tools
- Tool discovery/creation functions for building MCPTool instances from config
- Config parsing utilities
"""

import asyncio
import json
import logging
import shlex
from typing import Any, Optional, Type

from pydantic import BaseModel, Field, create_model

from .base import BaseTool

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# JSON Schema → Pydantic helpers
# ---------------------------------------------------------------------------


def _json_schema_type_to_python(json_type: str) -> type:
    """Map JSON Schema types to Python types."""
    mapping: dict[str, type] = {
        "string": str,
        "integer": int,
        "number": float,
        "boolean": bool,
        "object": dict,
        "array": list,
    }
    return mapping.get(json_type, str)


def build_args_model(tool_name: str, input_schema: dict) -> Type[BaseModel]:
    """Build a Pydantic model from MCP tool's JSON Schema input_schema."""
    properties = input_schema.get("properties", {})
    required = set(input_schema.get("required", []))
    fields: dict[str, Any] = {}
    for prop_name, prop_info in properties.items():
        python_type = _json_schema_type_to_python(prop_info.get("type", "string"))
        description = prop_info.get("description", "")
        if prop_name in required:
            fields[prop_name] = (python_type, Field(..., description=description))
        else:
            default = prop_info.get("default", None)
            fields[prop_name] = (
                Optional[python_type],
                Field(default=default, description=description),
            )

    model_name = f"MCPArgs_{tool_name}"
    return create_model(model_name, **fields)


# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------


def parse_mcp_config(config: dict) -> dict:
    """Parse a JSON config into normalised transport/command/args/env.

    Handles the case where the user puts the full command string
    (e.g. ``"npx -y mcp-remote https://..."`` ) into the command field.

    Returns a dict with keys: transport, command, args, env.
    """
    transport = config.get("transport", "stdio")
    command = config.get("command", "")
    args = config.get("args", [])
    env = config.get("env", {})
    url = config.get("url", "")

    # If stdio and args is empty but command has spaces, split it
    if transport == "stdio" and not args and " " in command:
        parts = shlex.split(command)
        command = parts[0]
        args = parts[1:]

    return {
        "transport": transport,
        "command": command if transport == "stdio" else url,
        "args": args,
        "env": env,
    }


# ---------------------------------------------------------------------------
# Tool discovery & creation
# ---------------------------------------------------------------------------


def _make_tool(parsed: dict, tool_info: Any) -> "MCPTool":
    """Build an MCPTool from MCP tool info."""
    input_schema = tool_info.inputSchema if hasattr(tool_info, "inputSchema") else {}
    args_model = (
        build_args_model(tool_info.name, input_schema) if input_schema else None
    )

    return MCPTool(
        name=tool_info.name,
        description=tool_info.description or f"MCP tool: {tool_info.name}",
        args_schema=args_model,
        server_transport=parsed["transport"],
        server_command=parsed["command"],
        server_args=parsed.get("args", []),
        server_env=parsed.get("env", {}),
        mcp_tool_name=tool_info.name,
    )


async def _async_discover_tools(parsed: dict) -> list["MCPTool"]:
    """Async: connect to an MCP server and return MCPTool wrappers."""
    from mcp import ClientSession
    from mcp.client.sse import sse_client
    from mcp.client.stdio import StdioServerParameters, stdio_client

    tools: list[MCPTool] = []
    transport = parsed["transport"]

    if transport == "stdio":
        server_params = StdioServerParameters(
            command=parsed["command"],
            args=parsed.get("args", []),
            env=parsed.get("env") or None,
        )
        async with stdio_client(server_params) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                result = await session.list_tools()
                for tool_info in result.tools:
                    tools.append(_make_tool(parsed, tool_info))
    elif transport == "sse":
        async with sse_client(url=parsed["command"]) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                result = await session.list_tools()
                for tool_info in result.tools:
                    tools.append(_make_tool(parsed, tool_info))

    return tools


def _run_async(coro: Any) -> Any:
    """Run an async coroutine from a sync context, handling event loops."""
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            import concurrent.futures

            with concurrent.futures.ThreadPoolExecutor() as pool:
                return pool.submit(asyncio.run, coro).result()
        else:
            return loop.run_until_complete(coro)
    except RuntimeError:
        return asyncio.run(coro)


def create_tools_from_config(
    config: dict,
    enabled_tools: Optional[list[str]] = None,
) -> list["MCPTool"]:
    """Create MCPTool instances from an MCP server config dict.

    Args:
        config: MCP server JSON config with keys like transport, command, etc.
        enabled_tools: If provided, only return tools whose names are in this
            list.  If ``None`` or empty, return all discovered tools.

    Returns:
        List of MCPTool instances ready for use by agents.
    """
    parsed = parse_mcp_config(config)
    tools = _run_async(_async_discover_tools(parsed))

    if enabled_tools:
        tools = [t for t in tools if t.mcp_tool_name in enabled_tools]

    return tools


async def async_discover_tools_info(config: dict) -> list[dict]:
    """Connect to an MCP server and return raw tool info dicts.

    Returns a list of dicts with keys: name, description.
    Useful for UI display without instantiating full MCPTool objects.
    """
    from mcp import ClientSession
    from mcp.client.sse import sse_client
    from mcp.client.stdio import StdioServerParameters, stdio_client

    parsed = parse_mcp_config(config)
    transport = parsed["transport"]
    tool_infos: list[dict] = []

    if transport == "stdio":
        server_params = StdioServerParameters(
            command=parsed["command"],
            args=parsed.get("args", []),
            env=parsed.get("env") or None,
        )
        async with stdio_client(server_params) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                result = await session.list_tools()
                for t in result.tools:
                    tool_infos.append(
                        {
                            "name": t.name,
                            "description": t.description or "",
                        }
                    )
    elif transport == "sse":
        async with sse_client(url=parsed["command"]) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                result = await session.list_tools()
                for t in result.tools:
                    tool_infos.append(
                        {
                            "name": t.name,
                            "description": t.description or "",
                        }
                    )

    return tool_infos


def discover_tools_info(config: dict) -> list[dict]:
    """Sync wrapper around async_discover_tools_info."""
    return _run_async(async_discover_tools_info(config))


def format_tool_list(
    tool_infos: list[dict],
    enabled_tools: Optional[list[str]] = None,
) -> str:
    """Format tool info dicts into a readable HTML string.

    Args:
        tool_infos: List of dicts with 'name' and 'description' keys.
        enabled_tools: If provided, marks which tools are enabled.
    """
    lines = [f"✅ Connected! Found <b>{len(tool_infos)}</b> tool(s):<br>"]
    for t in tool_infos:
        desc = (t.get("description") or "No description")[:120]
        if enabled_tools is not None:
            check = "✅" if t["name"] in enabled_tools else "⬜"
            lines.append(f"&nbsp;&nbsp;{check} <b>{t['name']}</b> — {desc}<br>")
        else:
            lines.append(f"&nbsp;&nbsp;• <b>{t['name']}</b> — {desc}<br>")
    if enabled_tools is not None:
        enabled_count = sum(1 for t in tool_infos if t["name"] in enabled_tools)
        lines.append(
            f"<br><i>{enabled_count}/{len(tool_infos)} tool(s) enabled. "
            'Add <code>"enabled_tools": ["tool_name", ...]</code> '
            "to your config JSON to limit tools.</i>"
        )
    else:
        lines.append(
            "<br><i>All tools enabled. Add "
            '<code>"enabled_tools": ["tool_name", ...]</code> '
            "to your config JSON to limit tools.</i>"
        )
    return "".join(lines)


# ---------------------------------------------------------------------------
# MCPTool class
# ---------------------------------------------------------------------------


class MCPTool(BaseTool):
    """A kotaemon BaseTool wrapper around a single MCP server tool.

    This tool holds the MCP server configuration and establishes
    a connection to invoke the tool on demand.

    Example usage::

        tool = MCPTool(
            name="search",
            description="Search the web",
            server_transport="stdio",
            server_command="uvx",
            server_args=["mcp-server-fetch"],
            mcp_tool_name="fetch",
        )
        result = tool.run("https://example.com")
    """

    name: str = ""
    description: str = ""
    args_schema: Optional[Type[BaseModel]] = None

    # MCP server connection details
    server_transport: str = "stdio"
    server_command: str = ""
    server_args: list[str] = []
    server_env: dict[str, str] = {}

    # The original MCP tool name (on the server)
    mcp_tool_name: str = ""

    def _run_tool(self, *args: Any, **kwargs: Any) -> str:
        """Invoke the MCP tool by establishing a session."""
        return _run_async(self._arun_tool(*args, **kwargs))

    async def _arun_tool(self, *args: Any, **kwargs: Any) -> str:
        """Async implementation that connects to the MCP server and calls
        the tool."""
        from mcp import ClientSession
        from mcp.client.sse import sse_client
        from mcp.client.stdio import StdioServerParameters, stdio_client

        # Build tool arguments
        if args and isinstance(args[0], str):
            try:
                tool_args = json.loads(args[0])
            except json.JSONDecodeError:
                # If not JSON, assume single string argument
                if self.args_schema:
                    first_field = next(iter(self.args_schema.model_fields.keys()))
                    tool_args = {first_field: args[0]}
                else:
                    tool_args = {"input": args[0]}
        else:
            tool_args = kwargs

        if self.server_transport == "stdio":
            cmd = self.server_command
            cmd_args = self.server_args
            # Auto-split if full command string with no separate args
            if not cmd_args and " " in cmd:
                parts = shlex.split(cmd)
                cmd = parts[0]
                cmd_args = parts[1:]

            server_params = StdioServerParameters(
                command=cmd,
                args=cmd_args,
                env=self.server_env if self.server_env else None,
            )
            async with stdio_client(server_params) as (read, write):
                async with ClientSession(read, write) as session:
                    await session.initialize()
                    result = await session.call_tool(self.mcp_tool_name, tool_args)
                    return self._format_result(result)
        elif self.server_transport == "sse":
            async with sse_client(url=self.server_command) as (read, write):
                async with ClientSession(read, write) as session:
                    await session.initialize()
                    result = await session.call_tool(self.mcp_tool_name, tool_args)
                    return self._format_result(result)
        else:
            return f"Unsupported transport: {self.server_transport}"

    def _format_result(self, result: Any) -> str:
        """Format MCP CallToolResult into a string."""
        if result.isError:
            return f"MCP Tool Error: {result.content}"

        parts = []
        for content in result.content:
            if hasattr(content, "text"):
                parts.append(content.text)
            elif hasattr(content, "data"):
                parts.append(f"[Binary data: {content.mimeType}]")
            else:
                parts.append(str(content))
        return "\n".join(parts)


================================================
FILE: libs/kotaemon/kotaemon/agents/tools/wikipedia.py
================================================
from typing import Any, AnyStr, Optional, Type, Union

from pydantic import BaseModel, Field

from kotaemon.base import Document

from .base import BaseTool


class Wiki:
    """Wrapper around wikipedia API."""

    def __init__(self) -> None:
        """Check that wikipedia package is installed."""
        try:
            import wikipedia  # noqa: F401
        except ImportError:
            raise ValueError(
                "Could not import wikipedia python package. "
                "Please install it with `pip install wikipedia`."
            )

    def search(self, search: str) -> Union[str, Document]:
        """Try to search for wiki page.

        If page exists, return the page summary, and a PageWithLookups object.
        If page does not exist, return similar entries.
        """
        import wikipedia

        try:
            page_content = wikipedia.page(search).content
            url = wikipedia.page(search).url
            result: Union[str, Document] = Document(
                text=page_content, metadata={"page": url}
            )
        except wikipedia.PageError:
            result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
        except wikipedia.DisambiguationError:
            result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
        return result


class WikipediaArgs(BaseModel):
    query: str = Field(..., description="a search query as input to wkipedia")


class WikipediaTool(BaseTool):
    """Tool that adds the capability to query the Wikipedia API."""

    name: str = "wikipedia"
    description: str = (
        "Search engine from Wikipedia, retrieving relevant wiki page. "
        "Useful when you need to get holistic knowledge about people, "
        "places, companies, historical events, or other subjects. "
        "Input should be a search query."
    )
    args_schema: Optional[Type[BaseModel]] = WikipediaArgs
    doc_store: Any = None

    def _run_tool(self, query: AnyStr) -> AnyStr:
        if not self.doc_store:
            self.doc_store = Wiki()
        tool = self.doc_store
        evidence = tool.search(query)
        return evidence


================================================
FILE: libs/kotaemon/kotaemon/agents/utils.py
================================================
from kotaemon.base import Document


def get_plugin_response_content(output) -> str:
    """
    Wrapper for AgentOutput content return
    """
    if isinstance(output, Document):
        return output.text
    else:
        return str(output)


def calculate_cost(model_name: str, prompt_token: int, completion_token: int) -> float:
    """
    Calculate the cost of a prompt and completion.

    Returns:
        float: Cost of the provided model name with provided token information
    """
    # TODO: to be implemented
    return 0.0


================================================
FILE: libs/kotaemon/kotaemon/base/__init__.py
================================================
from .component import BaseComponent, Node, Param, lazy
from .schema import (
    AIMessage,
    BaseMessage,
    Document,
    DocumentWithEmbedding,
    ExtractorOutput,
    HumanMessage,
    LLMInterface,
    RetrievedDocument,
    StructuredOutputLLMInterface,
    SystemMessage,
)

__all__ = [
    "BaseComponent",
    "Document",
    "DocumentWithEmbedding",
    "BaseMessage",
    "SystemMessage",
    "AIMessage",
    "HumanMessage",
    "RetrievedDocument",
    "LLMInterface",
    "StructuredOutputLLMInterface",
    "ExtractorOutput",
    "Param",
    "Node",
    "lazy",
]


================================================
FILE: libs/kotaemon/kotaemon/base/component.py
================================================
from abc import abstractmethod
from typing import Any, AsyncGenerator, Iterator, Optional

from theflow import Function, Node, Param, lazy

from kotaemon.base.schema import Document


class BaseComponent(Function):
    """A component is a class that can be used to compose a pipeline.

    !!! tip "Benefits of component"
        - Auto caching, logging
        - Allow deployment

    !!! tip "For each component, the spirit is"
        - Tolerate multiple input types, e.g. str, Document, List[str], List[Document]
        - Enforce single output type. Hence, the output type of a component should be
    as generic as possible.
    """

    inflow = None

    def flow(self):
        if self.inflow is None:
            raise ValueError("No inflow provided.")

        if not isinstance(self.inflow, BaseComponent):
            raise ValueError(
                f"inflow must be a BaseComponent, found {type(self.inflow)}"
            )

        return self.__call__(self.inflow.flow())

    def set_output_queue(self, queue):
        self._queue = queue
        for name in self._ff_nodes:
            node = getattr(self, name)
            if isinstance(node, BaseComponent):
                node.set_output_queue(queue)

    def report_output(self, output: Optional[Document]):
        if self._queue is not None:
            self._queue.put_nowait(output)

    def invoke(self, *args, **kwargs) -> Document | list[Document] | None:
        ...

    async def ainvoke(self, *args, **kwargs) -> Document | list[Document] | None:
        ...

    def stream(self, *args, **kwargs) -> Iterator[Document] | None:
        ...

    def astream(self, *args, **kwargs) -> AsyncGenerator[Document, None] | None:
        ...

    @abstractmethod
    def run(
        self, *args, **kwargs
    ) -> Document | list[Document] | Iterator[Document] | None | Any:
        """Run the component."""
        ...


__all__ = ["BaseComponent", "Param", "Node", "lazy"]


================================================
FILE: libs/kotaemon/kotaemon/base/schema.py
================================================
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal, Optional, TypeVar

from langchain.schema.messages import AIMessage as LCAIMessage
from langchain.schema.messages import HumanMessage as LCHumanMessage
from langchain.schema.messages import SystemMessage as LCSystemMessage
from llama_index.core.bridge.pydantic import Field
from llama_index.core.schema import Document as BaseDocument

if TYPE_CHECKING:
    from haystack.schema import Document as HaystackDocument
    from openai.types.chat.chat_completion_message_param import (
        ChatCompletionMessageParam,
    )

IO_Type = TypeVar("IO_Type", "Document", str)
SAMPLE_TEXT = "A sample Document from kotaemon"


class Document(BaseDocument):
    """
    Base document class, mostly inherited from Document class from llama-index.

    This class accept one positional argument `content` of an arbitrary type, which will
        store the raw content of the document. If specified, the class will use
        `content` to initialize the base llama_index class.

    Attributes:
        content: raw content of the document, can be anything
        source: id of the source of the Document. Optional.
        channel: the channel to show the document. Optional.:
            - chat: show in chat message
            - info: show in information panel
            - index: show in index panel
            - debug: show in debug panel
    """

    content: Any = None
    source: Optional[str] = None
    channel: Optional[Literal["chat", "info", "index", "debug", "plot"]] = None

    def __init__(self, content: Optional[Any] = None, *args, **kwargs):
        if content is None:
            if kwargs.get("text", None) is not None:
                kwargs["content"] = kwargs["text"]
            elif kwargs.get("embedding", None) is not None:
                kwargs["content"] = kwargs["embedding"]
                # default text indicating this document only contains embedding
                kwargs["text"] = "<EMBEDDING>"
        elif isinstance(content, Document):
            # TODO: simplify the Document class
            temp_ = content.dict()
            temp_.update(kwargs)
            kwargs = temp_
        else:
            kwargs["content"] = content
            if content:
                kwargs["text"] = str(content)
            else:
                kwargs["text"] = ""
        super().__init__(*args, **kwargs)

    def __bool__(self):
        return bool(self.content)

    @classmethod
    def example(cls) -> "Document":
        document = Document(
            text=SAMPLE_TEXT,
            metadata={"filename": "README.md", "category": "codebase"},
        )
        return document

    def to_haystack_format(self) -> "HaystackDocument":
        """Convert struct to Haystack document format."""
        from haystack.schema import Document as HaystackDocument

        metadata = self.metadata or {}
        text = self.text
        return HaystackDocument(content=text, meta=metadata)

    def __str__(self):
        return str(self.content)


class DocumentWithEmbedding(Document):
    """Subclass of Document which must contains embedding

    Use this if you want to enforce component's IOs to must contain embedding.
    """

    def __init__(self, embedding: list[float], *args, **kwargs):
        kwargs["embedding"] = embedding
        super().__init__(*args, **kwargs)


class BaseMessage(Document):
    def __add__(self, other: Any):
        raise NotImplementedError

    def to_openai_format(self) -> "ChatCompletionMessageParam":
        raise NotImplementedError


class SystemMessage(BaseMessage, LCSystemMessage):
    def to_openai_format(self) -> "ChatCompletionMessageParam":
        return {"role": "system", "content": self.content}


class AIMessage(BaseMessage, LCAIMessage):
    def to_openai_format(self) -> "ChatCompletionMessageParam":
        return {"role": "assistant", "content": self.content}


class HumanMessage(BaseMessage, LCHumanMessage):
    def to_openai_format(self) -> "ChatCompletionMessageParam":
        return {"role": "user", "content": self.content}


class RetrievedDocument(Document):
    """Subclass of Document with retrieval-related information

    Attributes:
        score (float): score of the document (from 0.0 to 1.0)
        retrieval_metadata (dict): metadata from the retrieval process, can be used
            by different components in a retrieved pipeline to communicate with each
            other
    """

    score: float = Field(default=0.0)
    retrieval_metadata: dict = Field(default={})


class LLMInterface(AIMessage):
    candidates: list[str] = Field(default_factory=list)
    completion_tokens: int = -1
    total_tokens: int = -1
    prompt_tokens: int = -1
    total_cost: float = 0
    logits: list[list[float]] = Field(default_factory=list)
    messages: list[AIMessage] = Field(default_factory=list)
    logprobs: list[float] = []


class StructuredOutputLLMInterface(LLMInterface):
    parsed: Any
    refusal: str = ""


class ExtractorOutput(Document):
    """
    Represents the output of an extractor.
    """

    matches: list[str]


================================================
FILE: libs/kotaemon/kotaemon/chatbot/__init__.py
================================================
from .base import BaseChatBot, ChatConversation
from .simple_respondent import SimpleRespondentChatbot

__all__ = ["BaseChatBot", "SimpleRespondentChatbot", "ChatConversation"]


================================================
FILE: libs/kotaemon/kotaemon/chatbot/base.py
================================================
from abc import abstractmethod
from typing import List, Optional

from theflow import SessionFunction

from kotaemon.base import BaseComponent, LLMInterface
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage


class BaseChatBot(BaseComponent):
    @abstractmethod
    def run(self, messages: List[BaseMessage]) -> LLMInterface:
        ...


def session_chat_storage(obj):
    """Store using the bot location rather than the session location"""
    return obj._store_result


class ChatConversation(SessionFunction):
    """Base implementation of a chat bot component

    A chatbot component should:
        - handle internal state, including history messages
        - return output for a given input
    """

    class Config:
        store_result = session_chat_storage

    system_message: str = ""
    bot: BaseChatBot

    def __init__(self, *args, **kwargs):
        self._history: List[BaseMessage] = []
        self._store_result = (
            f"{self.__module__}.{self.__class__.__name__},uninitiated_bot"
        )
        super().__init__(*args, **kwargs)

    def run(self, message: HumanMessage) -> Optional[BaseMessage]:
        """Chat, given a message, return a response

        Args:
            message: The message to respond to

        Returns:
            The response to the message. If None, no response is sent.
        """
        user_message = (
            HumanMessage(content=message) if isinstance(message, str) else message
        )
        self.history.append(user_message)

        output = self.bot(self.history).text
        output_message = None
        if output is not None:
            output_message = AIMessage(content=output)
            self.history.append(output_message)

        return output_message

    def start_session(self):
        self._store_result = self.bot.config.store_result
        super().start_session()
        if not self.history and self.system_message:
            system_message = SystemMessage(content=self.system_message)
            self.history.append(system_message)

    def end_session(self):
        super().end_session()
        self._history = []

    def check_end(
        self,
        history: Optional[List[BaseMessage]] = None,
        user_message: Optional[HumanMessage] = None,
        bot_message: Optional[AIMessage] = None,
    ) -> bool:
        """Check if a conversation should end"""
        if user_message is not None and user_message.content == "":
            return True

        return False

    def terminal_session(self):
        """Create a terminal session"""
        self.start_session()
        print(">> Start chat:")

        while True:
            human = HumanMessage(content=input("Human: "))
            if self.check_end(history=self.history, user_message=human):
                break

            output = self(human)
            if output is None:
                print("AI: <No response>")
            else:
                print("AI:", output.content)

            if self.check_end(history=self.history, bot_message=output):
                break

        self.end_session()

    @property
    def history(self):
        return self._history

    @history.setter
    def history(self, value):
        self._history = value
        self._variablex()


================================================
FILE: libs/kotaemon/kotaemon/chatbot/simple_respondent.py
================================================
from ..llms import ChatLLM
from .base import BaseChatBot


class SimpleRespondentChatbot(BaseChatBot):
    """Simple text respondent chatbot that essentially wraps around a chat LLM"""

    llm: ChatLLM

    def _get_message(self) -> str:
        return self.llm(self.history).text


================================================
FILE: libs/kotaemon/kotaemon/cli.py
================================================
import os

import click
import yaml
from trogon import tui


# check if the output is not a .yml file -> raise error
def check_config_format(config):
    if os.path.exists(config):
        if isinstance(config, str):
            with open(config) as f:
                yaml.safe_load(f)
        else:
            raise ValueError("config must be yaml format.")


@tui(command="ui", help="Open the terminal UI")  # generate the terminal UI
@click.group()
def main():
    pass


@click.group()
def promptui():
    pass


main.add_command(promptui)


@promptui.command()
@click.argument("export_path", nargs=1)
@click.option("--output", default="promptui.yml", show_default=True, required=False)
def export(export_path, output):
    """Export a pipeline to a config file"""
    import sys

    from theflow.utils.modules import import_dotted_string

    from kotaemon.contribs.promptui.config import export_pipeline_to_config

    sys.path.append(os.getcwd())
    cls = import_dotted_string(export_path, safe=False)
    export_pipeline_to_config(cls, output)
    check_config_format(output)


@promptui.command()
@click.argument("run_path", required=False, default="promptui.yml")
@click.option(
    "--share",
    is_flag=True,
    show_default=True,
    default=False,
    help="Share the app through Gradio. Requires --username to enable authentication.",
)
@click.option(
    "--username",
    required=False,
    help=(
        "Username for the user. If not provided, the promptui will not have "
        "authentication."
    ),
)
@click.option(
    "--password",
    required=False,
    help="Password for the user. If not provided, will be prompted.",
)
@click.option(
    "--appname",
    required=False,
    help="The share app subdomain. Requires --share and --username",
)
@click.option(
    "--port",
    required=False,
    help="Port to run the app. If not provided, will $GRADIO_SERVER_PORT (7860)",
)
def run(run_path, share, username, password, appname, port):
    """Run the UI from a config file

    Examples:

        \b
        # Run with default config file
        $ kh promptui run

        \b
        # Run with username and password supplied
        $ kh promptui run --username admin --password password

        \b
        # Run with username and prompted password
        $ kh promptui run --username admin

        # Run and share to promptui
        # kh promptui run --username admin --password password --share --appname hey \
                --port 7861
    """
    import sys

    from kotaemon.contribs.promptui.ui import build_from_dict

    sys.path.append(os.getcwd())

    check_config_format(run_path)
    demo = build_from_dict(run_path)

    params: dict = {}
    if username is not None:
        if password is not None:
            auth = (username, password)
        else:
            auth = (username, click.prompt("Password", hide_input=True))
        params["auth"] = auth

    port = int(port) if port else int(os.getenv("GRADIO_SERVER_PORT", "7860"))
    params["server_port"] = port

    if share:
        if username is None:
            raise ValueError(
                "Username must be provided to enable authentication for sharing"
            )
        if appname:
            from kotaemon.contribs.promptui.tunnel import Tunnel

            tunnel = Tunnel(
                appname=str(appname), username=str(username), local_port=port
            )
            url = tunnel.run()
            print(f"App is shared at {url}")
        else:
            params["share"] = True
            print("App is shared at Gradio")

    demo.launch(**params)


@main.command()
@click.argument("module", required=True)
@click.option(
    "--output", default="docs.md", required=False, help="The output markdown file"
)
@click.option(
    "--separation-level", required=False, default=1, help="Organize markdown layout"
)
def makedoc(module, output, separation_level):
    """Make documentation for module `module`

    Example:

        \b
        # Make component documentation for kotaemon library
        $ kh makedoc kotaemon
    """
    from kotaemon.contribs.docs import make_doc

    make_doc(module, output, separation_level)
    print(f"Documentation exported to {output}")


@main.command()
@click.option(
    "--template",
    default="project-default",
    required=False,
    help="Template name",
    show_default=True,
)
def start_project(template):
    """Start a project from a template.

    Important: the value for --template corresponds to the name of the template folder,
    which is located at https://github.com/Cinnamon/kotaemon/tree/main/templates
    The default value is "project-default", which should work when you are starting a
    client project.
    """

    print("Retrieving template...")
    os.system(
        "cookiecutter git@github.com:Cinnamon/kotaemon.git "
        f"--directory='templates/{template}'"
    )


if __name__ == "__main__":
    main()


================================================
FILE: libs/kotaemon/kotaemon/contribs/__init__.py
================================================


================================================
FILE: libs/kotaemon/kotaemon/contribs/docs.py
================================================
import inspect
from collections import defaultdict

from theflow.utils.documentation import get_function_documentation_from_module


def from_definition_to_markdown(definition: dict) -> str:
    """From definition to markdown"""

    # Handle params
    params = " N/A\n"
    if definition["params"]:
        params = "\n| Name | Description | Type | Default |\n"
        params += "| --- | --- | --- | --- |\n"
        for name, p in definition["params"].items():
            type_ = p["type"].__name__ if inspect.isclass(p["type"]) else p["type"]
            params += f"| {name} | {p['desc']} | {type_} | {p['default']} |\n"

    # Handle nodes
    nodes = " N/A\n"
    if definition["nodes"]:
        nodes = "\n| Name | Description | Type | Input | Output |\n"
        nodes += "| --- | --- | --- | --- | --- |\n"
        for name, n in definition["nodes"].items():
            type_ = n["type"].__name__ if inspect.isclass(n["type"]) else str(n["type"])
            input_ = (
                n["input"].__name__ if inspect.isclass(n["input"]) else str(n["input"])
            )
            output_ = (
                n["output"].__name__
                if inspect.isclass(n["output"])
                else str(n["output"])
            )
            nodes += f"|{name}|{n['desc']}|{type_}|{input_}|{output_}|\n"

    description = inspect.cleandoc(definition["desc"])
    return f"{description}\n\n_**Params:**_{params}\n_**Nodes:**_{nodes}"


def make_doc(module: str, output: str, separation_level: int):
    """Run exporting components to markdown

    Args:
        module (str): module name
        output_path (str): output path to save
        separation_level (int): level of separation
    """
    documentation = sorted(
        get_function_documentation_from_module(module).items(), key=lambda x: x[0]
    )

    entries = defaultdict(list)

    for name, definition in documentation:
        section = name.split(".")[separation_level].capitalize()
        cls_name = name.split(".")[-1]

        markdown = from_definition_to_markdown(definition)
        entries[section].append(f"### {cls_name}\n{markdown}")

    final = "\n".join(
        [f"## {section}\n" + "\n".join(entries[section]) for section in entries]
    )

    with open(output, "w") as f:
        f.write(final)


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/.gitignore
================================================
/frpc_*


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/__init__.py
================================================


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/base.py
================================================
import gradio as gr

COMPONENTS_CLASS = {
    "text": gr.components.Textbox,
    "checkbox": gr.components.CheckboxGroup,
    "dropdown": gr.components.Dropdown,
    "file": gr.components.File,
    "image": gr.components.Image,
    "number": gr.components.Number,
    "radio": gr.components.Radio,
    "slider": gr.components.Slider,
}
SUPPORTED_COMPONENTS = set(COMPONENTS_CLASS.keys())
DEFAULT_COMPONENT_BY_TYPES = {
    "str": "text",
    "bool": "checkbox",
    "int": "number",
    "float": "number",
    "list": "dropdown",
}


def get_component(component_def: dict) -> gr.components.Component:
    """Get the component based on component definition"""
    component_cls = None

    if "component" in component_def:
        component = component_def["component"]
        if component not in SUPPORTED_COMPONENTS:
            raise ValueError(
                f"Unsupported UI component: {component}. "
                f"Must be one of {SUPPORTED_COMPONENTS}"
            )

        component_cls = COMPONENTS_CLASS[component]
    else:
        raise ValueError(
            f"Cannot decide the component from {component_def}. "
            "Please specify `component` with 1 of the following "
            f"values: {SUPPORTED_COMPONENTS}"
        )

    return component_cls(**component_def.get("params", {}))


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/cli.py
================================================
"""CLI commands that can be imported by the kotaemon.cli module"""


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/config.py
================================================
"""Get config from Pipeline"""
import inspect
from pathlib import Path
from typing import Any, Dict, Optional, Type, Union

import yaml

from kotaemon.base import BaseComponent
from kotaemon.chatbot import BaseChatBot

from .base import DEFAULT_COMPONENT_BY_TYPES


def config_from_value(value: Any) -> dict:
    """Get the config from default value

    Args:
        value (Any): default value

    Returns:
        dict: config
    """
    component = DEFAULT_COMPONENT_BY_TYPES.get(type(value).__name__, "text")
    return {
        "component": component,
        "params": {
            "value": value,
        },
    }


def handle_param(param: dict) -> dict:
    """Convert param definition into promptui-compliant config

    Supported gradio's UI components are (https://www.gradio.app/docs/components)
        - CheckBoxGroup: list (multi select)
        - DropDown: list (single select)
        - File
        - Image
        - Number: int / float
        - Radio: list (single select)
        - Slider: int / float
        - TextBox: str
    """
    params = {}
    default = param.get("default", None)
    if isinstance(default, str) and default.startswith("{{") and default.endswith("}}"):
        default = None
    if default is not None:
        params["value"] = default

    ui_component = param.get("component_ui", "")
    if not ui_component:
        type_: str = type(default).__name__ if default is not None else ""
        ui_component = DEFAULT_COMPONENT_BY_TYPES.get(type_, "text")

    return {
        "component": ui_component,
        "params": params,
    }


def handle_node(node: dict) -> dict:
    """Convert node definition into promptui-compliant config"""
    config = {}
    for name, param_def in node.get("params", {}).items():
        if isinstance(param_def["auto_callback"], str):
            continue
        if param_def.get("ignore_ui", False):
            continue
        config[name] = handle_param(param_def)
    for name, node_def in node.get("nodes", {}).items():
        if isinstance(node_def["auto_callback"], str):
            continue
        if node_def.get("ignore_ui", False):
            continue
        for key, value in handle_node(node_def["default"]).items():
            config[f"{name}.{key}"] = value
        for key, value in node_def.get("default_kwargs", {}).items():
            config[f"{name}.{key}"] = config_from_value(value)

    return config


def handle_input(pipeline: Union[BaseComponent, Type[BaseComponent]]) -> dict:
    """Get the input from the pipeline"""
    signature = inspect.signature(pipeline.run)
    inputs: Dict[str, Dict] = {}
    for name, param in signature.parameters.items():
        if name in ["self", "args", "kwargs"]:
            continue
        input_def: Dict[str, Optional[Any]] = {"component": "text"}
        default = param.default
        if default is param.empty:
            inputs[name] = input_def
            continue

        params = {}
        params["value"] = default
        type_ = type(default).__name__ if default is not None else None
        ui_component = None
        if type_ is not None:
            ui_component = "text"

        input_def["component"] = ui_component
        input_def["params"] = params

        inputs[name] = input_def

    return inputs


def export_pipeline_to_config(
    pipeline: Union[BaseComponent, Type[BaseComponent]],
    path: Optional[str] = None,
) -> dict:
    """Export a pipeline to a promptui-compliant config dict"""
    if inspect.isclass(pipeline):
        pipeline = pipeline()

    pipeline_def = pipeline.describe()
    ui_type = "chat" if isinstance(pipeline, BaseChatBot) else "simple"
    if ui_type == "chat":
        params = {f".bot.{k}": v for k, v in handle_node(pipeline_def).items()}
        params["system_message"] = {"component": "text", "params": {"value": ""}}
        outputs = []
        if hasattr(pipeline, "_promptui_outputs"):
            outputs = pipeline._promptui_outputs
        config_obj: dict = {
            "ui-type": ui_type,
            "params": params,
            "inputs": {},
            "outputs": outputs,
            "logs": {
                "full_pipeline": {
                    "input": {
                        "step": ".",
                        "getter": "_get_input",
                    },
                    "output": {
                        "step": ".",
                        "getter": "_get_output",
                    },
                    "preference": {
                        "step": "preference",
                    },
                }
            },
        }
    else:
        outputs = [{"step": ".", "getter": "_get_output", "component": "text"}]
        if hasattr(pipeline, "_promptui_outputs"):
            outputs = pipeline._promptui_outputs
        config_obj = {
            "ui-type": ui_type,
            "params": handle_node(pipeline_def),
            "inputs": handle_input(pipeline),
            "outputs": outputs,
            "logs": {
                "full_pipeline": {
                    "input": {
                        "step": ".",
                        "getter": "_get_input",
                    },
                    "output": {
                        "step": ".",
                        "getter": "_get_output",
                    },
                },
            },
        }

    config = {f"{pipeline.__module__}.{pipeline.__class__.__name__}": config_obj}
    if path is not None:
        old_config = config
        if Path(path).is_file():
            with open(path) as f:
                old_config = yaml.safe_load(f)
                old_config.update(config)
        with open(path, "w") as f:
            yaml.safe_dump(old_config, f, sort_keys=False)

    return config


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/export.py
================================================
"""Export logs into Excel file"""
import os
import pickle
from pathlib import Path
from typing import Any, Dict, List, Type, Union

import pandas as pd
import yaml
from theflow.storage import storage
from theflow.utils.modules import import_dotted_string

from kotaemon.base import BaseComponent

from .logs import ResultLog


def from_log_to_dict(pipeline_cls: Type[BaseComponent], log_config: dict) -> dict:
    """Export the log to panda dataframes

    Args:
        pipeline_cls (Type[BaseComponent]): Pipeline class
        log_config (dict): Log config

    Returns:
        dataframe
    """
    # get the directory
    pipeline_log_path = storage.url(pipeline_cls().config.store_result)
    dirs = list(sorted([f.path for f in os.scandir(pipeline_log_path) if f.is_dir()]))

    # get resultlog callback
    resultlog = getattr(pipeline_cls, "_promptui_resultlog", ResultLog)
    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith("__")}

    ids = []
    params: Dict[str, List[Any]] = {}
    logged_infos: Dict[str, List[Any]] = {}

    for idx, each_dir in enumerate(dirs):
        ids.append(str(Path(each_dir).name))

        # get the params
        params_file = os.path.join(each_dir, "params.pkl")
        if os.path.exists(params_file):
            with open(params_file, "rb") as f:
                each_params = pickle.load(f)
            for key, value in each_params.items():
                if key not in params:
                    params[key] = [None] * len(dirs)
                params[key][idx] = value

        # get the progress
        progress_file = os.path.join(each_dir, "progress.pkl")
        if os.path.exists(progress_file):
            with open(progress_file, "rb") as f:
                progress = pickle.load(f)

            for name, col_info in log_config.items():
                step = col_info["step"]
                getter = col_info.get("getter", None)
                if name not in logged_infos:
                    logged_infos[name] = [None] * len(dirs)

                if step not in progress:
                    continue

                info = progress[step]
                if getter:
                    if getter in allowed_resultlog_callbacks:
                        info = getattr(resultlog, getter)(info)
                else:
                    implicit_name = f"get_{name}"
                    if implicit_name in allowed_resultlog_callbacks:
                        info = getattr(resultlog, implicit_name)(info)
                logged_infos[name][idx] = info

    return {"ids": ids, **params, **logged_infos}


def export(config: dict, pipeline_def, output_path):
    """Export from config to Excel file"""

    pipeline_name = f"{pipeline_def.__module__}.{pipeline_def.__name__}"

    # export to Excel
    if not config.get("logs", {}):
        raise ValueError(f"Pipeline {pipeline_name} has no logs to export")

    pds: Dict[str, pd.DataFrame] = {}
    for log_name, log_def in config["logs"].items():
        pds[log_name] = pd.DataFrame(from_log_to_dict(pipeline_def, log_def))

    # from the list of pds, export to Excel to output_path
    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:  # type: ignore
        for log_name, df in pds.items():
            df.to_excel(writer, sheet_name=log_name)


def export_from_dict(
    config: Union[str, dict],
    pipeline: Union[str, Type[BaseComponent]],
    output_path: str,
):
    """CLI to export the logs of a pipeline into Excel file

    Args:
        config_path (str): Path to the config file
        pipeline_name (str): Name of the pipeline
        output_path (str): Path to the output Excel file
    """
    # get the pipeline class and the relevant config dict
    config_dict: dict
    if isinstance(config, str):
        with open(config) as f:
            config_dict = yaml.safe_load(f)
    elif isinstance(config, dict):
        config_dict = config
    else:
        raise TypeError(f"`config` must be str or dict, not {type(config)}")

    pipeline_name: str
    pipeline_cls: Type[BaseComponent]
    pipeline_config: dict
    if isinstance(pipeline, str):
        if pipeline not in config_dict:
            raise ValueError(f"Pipeline {pipeline} not found in config file")
        pipeline_name = pipeline
        pipeline_cls = import_dotted_string(pipeline, safe=False)
        pipeline_config = config_dict[pipeline]
    elif isinstance(pipeline, type) and issubclass(pipeline, BaseComponent):
        pipeline_name = f"{pipeline.__module__}.{pipeline.__name__}"
        if pipeline_name not in config_dict:
            raise ValueError(f"Pipeline {pipeline_name} not found in config file")
        pipeline_cls = pipeline
        pipeline_config = config_dict[pipeline_name]
    else:
        raise TypeError(
            f"`pipeline` must be str or subclass of BaseComponent, not {type(pipeline)}"
        )

    export(pipeline_config, pipeline_cls, output_path)


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/logs.py
================================================
class ResultLog:
    """Callback getter to get the desired log result

    The callback resolution will be as follow:
        1. Explicit string name
        2. Implicitly by: `get_<name>`
        3. Pass through
    """

    @staticmethod
    def _get_input(obj):
        return obj["input"]

    @staticmethod
    def _get_output(obj):
        return obj["output"]


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/themes.py
================================================
from __future__ import annotations

from typing import Iterable

from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes


class John(Base):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.neutral,
        secondary_hue: colors.Color | str = colors.neutral,
        neutral_hue: colors.Color | str = colors.neutral,
        spacing_size: sizes.Size | str = sizes.spacing_sm,
        radius_size: sizes.Size | str = sizes.radius_none,
        text_size: sizes.Size | str = sizes.text_sm,
        font: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Quicksand"),
            "ui-sans-serif",
            "system-ui",
            "sans-serif",
        ),
        font_mono: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("IBM Plex Mono"),
            "ui-monospace",
            "Consolas",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        self.name = "monochrome"
        super().set(
            # Colors
            slider_color="*neutral_900",
            slider_color_dark="*neutral_500",
            body_text_color="*neutral_900",
            block_label_text_color="*body_text_color",
            block_title_text_color="*body_text_color",
            body_text_color_subdued="*neutral_700",
            background_fill_primary_dark="*neutral_900",
            background_fill_secondary_dark="*neutral_800",
            block_background_fill_dark="*neutral_800",
            input_background_fill_dark="*neutral_700",
            # Button Colors
            button_primary_background_fill="*neutral_900",
            button_primary_background_fill_hover="*neutral_700",
            button_primary_text_color="white",
            button_primary_background_fill_dark="*neutral_600",
            button_primary_background_fill_hover_dark="*neutral_600",
            button_primary_text_color_dark="white",
            button_secondary_background_fill=(
                "linear-gradient(to bottom right, *neutral_100, *neutral_200)"
            ),
            button_secondary_background_fill_hover=(
                "linear-gradient(to bottom right, *neutral_100, *neutral_100)"
            ),
            button_secondary_background_fill_dark=(
                "linear-gradient(to bottom right, *neutral_600, *neutral_700)"
            ),
            button_secondary_background_fill_hover_dark=(
                "linear-gradient(to bottom right, *neutral_600, *neutral_600)"
            ),
            button_cancel_background_fill="*button_primary_background_fill",
            button_cancel_background_fill_hover="*button_primary_background_fill_hover",
            button_cancel_text_color="*button_primary_text_color",
            # Padding
            checkbox_label_padding="*spacing_sm",
            button_large_padding="*spacing_sm",
            button_small_padding="*spacing_sm",
            # Borders
            block_border_width="0px",
            block_border_width_dark="1px",
            shadow_drop_lg="0 1px 4px 0 rgb(0 0 0 / 0.1)",
            block_shadow="*shadow_drop_lg",
            block_shadow_dark="none",
            # Block Labels
            block_title_text_weight="600",
            block_label_text_weight="600",
            block_label_text_size="*text_sm",
        )


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/tunnel.py
================================================
import atexit
import logging
import os
import platform
import stat
import subprocess
from pathlib import Path

import requests

VERSION = "1.0"

machine = platform.machine()
if machine == "x86_64":
    machine = "amd64"

BINARY_REMOTE_NAME = f"frpc_{platform.system().lower()}_{machine.lower()}"
EXTENSION = ".exe" if os.name == "nt" else ""
BINARY_URL = (
    "some-endpoint.com" f"/kotaemon/tunneling/{VERSION}/{BINARY_REMOTE_NAME}{EXTENSION}"
)

BINARY_FILENAME = f"{BINARY_REMOTE_NAME}_v{VERSION}"
BINARY_FOLDER = Path(__file__).parent
BINARY_PATH = f"{BINARY_FOLDER / BINARY_FILENAME}"


logger = logging.getLogger(__name__)


class Tunnel:
    def __init__(self, appname, username, local_port):
        self.proc = None
        self.url = None
        self.appname = appname
        self.username = username
        self.local_port = local_port

    @staticmethod
    def download_binary():
        if not Path(BINARY_PATH).exists():
            print("First time setting tunneling...")
            resp = requests.get(BINARY_URL)

            if resp.status_code == 404:
                raise OSError(
                    f"Cannot set up a share link as this platform is incompatible. "
                    "Please create a GitHub issue with information about your "
                    f"platform: {platform.uname()}"
                )

            if resp.status_code == 403:
                raise OSError(
                    "You do not have permission to setup the tunneling. Please "
                    "make sure that you are within Cinnamon VPN or within other "
                    "approved IPs. If this is new server, please contact @channel "
                    "at #llm-productization to add your IP address"
                )

            resp.raise_for_status()

            # Save file data to local copy
            with open(BINARY_PATH, "wb") as file:
                file.write(resp.content)
            st = os.stat(BINARY_PATH)
            os.chmod(BINARY_PATH, st.st_mode | stat.S_IEXEC)

    def run(self) -> str:
        """Setting up tunneling"""
        if platform.system().lower() == "windows":
            logger.warning("Tunneling is not fully supported on Windows.")

        self.download_binary()
        self.url = self._start_tunnel(BINARY_PATH)
        return self.url

    def kill(self):
        if self.proc is not None:
            print(f"Killing tunnel 127.0.0.1:{self.local_port} <> {self.url}")
            self.proc.terminate()
            self.proc = None

    def _start_tunnel(self, binary: str) -> str:
        command = [
            binary,
            "http",
            "-l",
            str(self.local_port),
            "-i",
            "127.0.0.1",
            "--uc",
            "--sd",
            str(self.appname),
            "-n",
            str(self.appname + self.username),
            "--server_addr",
            "44.229.38.9:7000",
            "--token",
            "Wz807/DyC;#t;#/",
            "--disable_log_color",
        ]
        self.proc = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        atexit.register(self.kill)
        return f"https://{self.appname}.promptui.dm.cinnamon.is"


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/ui/__init__.py
================================================
from typing import Union

import gradio as gr
import yaml
from theflow.utils.modules import import_dotted_string

from ..themes import John
from .chat import build_chat_ui
from .pipeline import build_pipeline_ui


def build_from_dict(config: Union[str, dict]):
    """Build a full UI from YAML config file"""

    if isinstance(config, str):
        with open(config) as f:
            config_dict: dict = yaml.safe_load(f)
    elif isinstance(config, dict):
        config_dict = config
    else:
        raise ValueError(
            f"config must be either a yaml path or a dict, got {type(config)}"
        )

    demos = []
    for key, value in config_dict.items():
        pipeline_def = import_dotted_string(key, safe=False)
        if value["ui-type"] == "chat":
            demos.append(build_chat_ui(value, pipeline_def).queue())
        else:
            demos.append(build_pipeline_ui(value, pipeline_def).queue())
    if len(demos) == 1:
        demo = demos[0]
    else:
        demo = gr.TabbedInterface(
            demos,
            tab_names=list(config_dict.keys()),
            title="PromptUI from kotaemon",
            analytics_enabled=False,
            theme=John(),
        )

    demo.queue()

    return demo


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/ui/blocks.py
================================================
from __future__ import annotations

from typing import Any, AsyncGenerator

import anyio
from gradio import ChatInterface
from gradio.components import Component, get_component_instance
from gradio.events import on
from gradio.helpers import special_args
from gradio.routes import Request


class ChatBlock(ChatInterface):
    """The ChatBlock subclasses ChatInterface to provide extra functionalities:

    - Show additional outputs to the chat interface
    - Disallow blank user message
    """

    def __init__(
        self,
        *args,
        additional_outputs: str | Component | list[str | Component] | None = None,
        **kwargs,
    ):
        if additional_outputs:
            if not isinstance(additional_outputs, list):
                additional_outputs = [additional_outputs]
            self.additional_outputs = [
                get_component_instance(i) for i in additional_outputs  # type: ignore
            ]
        else:
            self.additional_outputs = []

        super().__init__(*args, **kwargs)

    async def _submit_fn(
        self,
        message: str,
        history_with_input: list[list[str | None]],
        request: Request,
        *args,
    ) -> tuple[Any, ...]:
        input_args = args[: -len(self.additional_outputs)]
        output_args = args[-len(self.additional_outputs) :]
        if not message:
            return history_with_input, history_with_input, *output_args

        history = history_with_input[:-1]
        inputs, _, _ = special_args(
            self.fn, inputs=[message, history, *input_args], request=request
        )

        if self.is_async:
            response = await self.fn(*inputs)
        else:
            response = await anyio.to_thread.run_sync(
                self.fn, *inputs, limiter=self.limiter
            )

        output = []
        if self.additional_outputs:
            text = response[0]
            output = response[1:]
        else:
            text = response

        history.append([message, text])
        return history, history, *output

    async def _stream_fn(
        self,
        message: str,
        history_with_input: list[list[str | None]],
        *args,
    ) -> AsyncGenerator:
        raise NotImplementedError("Stream function not implemented for ChatBlock")

    def _display_input(
        self, message: str, history: list[list[str | None]]
    ) -> tuple[list[list[str | None]], list[list[str | None]]]:
        """Stop displaying the input message if the message is a blank string"""
        if not message:
            return history, history
        return super()._display_input(message, history)

    def _setup_events(self) -> None:
        """Include additional outputs in the submit event"""
        submit_fn = self._stream_fn if self.is_generator else self._submit_fn
        submit_triggers = (
            [self.textbox.submit, self.submit_btn.click]
            if self.submit_btn
            else [self.textbox.submit]
        )
        submit_event = (
            on(
                submit_triggers,
                self._clear_and_save_textbox,
                [self.textbox],
                [self.textbox, self.saved_input],
                api_name=False,
                queue=False,
            )
            .then(
                self._display_input,
                [self.saved_input, self.chatbot_state],
                [self.chatbot, self.chatbot_state],
                api_name=False,
                queue=False,
            )
            .then(
                submit_fn,
                [self.saved_input, self.chatbot_state]
                + self.additional_inputs
                + self.additional_outputs,
                [self.chatbot, self.chatbot_state] + self.additional_outputs,
                api_name=False,
            )
        )
        self._setup_stop_events(submit_triggers, submit_event)

        if self.retry_btn:
            retry_event = (
                self.retry_btn.click(
                    self._delete_prev_fn,
                    [self.chatbot_state],
                    [self.chatbot, self.saved_input, self.chatbot_state],
                    api_name=False,
                    queue=False,
                )
                .then(
                    self._display_input,
                    [self.saved_input, self.chatbot_state],
                    [self.chatbot, self.chatbot_state],
                    api_name=False,
                    queue=False,
                )
                .then(
                    submit_fn,
                    [self.saved_input, self.chatbot_state]
                    + self.additional_inputs
                    + self.additional_outputs,
                    [self.chatbot, self.chatbot_state] + self.additional_outputs,
                    api_name=False,
                )
            )
            self._setup_stop_events([self.retry_btn.click], retry_event)

        if self.undo_btn:
            self.undo_btn.click(
                self._delete_prev_fn,
                [self.chatbot_state],
                [self.chatbot, self.saved_input, self.chatbot_state],
                api_name=False,
                queue=False,
            ).then(
                lambda x: x,
                [self.saved_input],
                [self.textbox],
                api_name=False,
                queue=False,
            )

        if self.clear_btn:
            self.clear_btn.click(
                lambda: ([], [], None),
                None,
                [self.chatbot, self.chatbot_state, self.saved_input],
                queue=False,
                api_name=False,
            )

    def _setup_api(self) -> None:
        api_fn = self._api_stream_fn if self.is_generator else self._api_submit_fn

        self.fake_api_btn.click(
            api_fn,
            [self.textbox, self.chatbot_state] + self.additional_inputs,
            [self.textbox, self.chatbot_state] + self.additional_outputs,
            api_name="chat",
        )


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py
================================================
import pickle
from datetime import datetime
from pathlib import Path

import gradio as gr
from theflow.storage import storage

from kotaemon.chatbot import ChatConversation
from kotaemon.contribs.promptui.base import get_component
from kotaemon.contribs.promptui.export import export
from kotaemon.contribs.promptui.ui.blocks import ChatBlock

from ..logs import ResultLog

USAGE_INSTRUCTION = """## How to use:

1. Set the desired parameters.
2. Click "New chat" to start a chat session with the supplied parameters. This
    set of parameters will persist until the end of the chat session. During an
    ongoing chat session, changing the parameters will not take any effect.
3. Chat and interact with the chat bot on the right panel. You can add any
    additional input (if any), and they will be supplied to the chatbot.
4. During chat, the log of the chat will show up in the "Output" tabs. This is
    empty by default, so if you want to show the log here, tell the AI developers
    to configure the UI settings.
5. When finishing chat, select your preference in the radio box. Click "End chat".
    This will save the chat log and the preference to disk.
6. To compare the result of different run, click "Export" to get an Excel
    spreadsheet summary of different run.

## Support:

In case of errors, you can:

- PromptUI instruction:
    https://github.com/Cinnamon/kotaemon/wiki/Utilities#prompt-engineering-ui
- Create bug fix and make PR at: https://github.com/Cinnamon/kotaemon
- Ping any of @john @tadashi @ian @jacky in Slack channel #llm-productization

## Contribute:

- Follow installation at: https://github.com/Cinnamon/kotaemon/
"""


def construct_chat_ui(
    config, func_new_chat, func_chat, func_end_chat, func_export_to_excel
) -> gr.Blocks:
    """Construct the prompt engineering UI for chat

    Args:
        config: the UI config
        func_new_chat: the function for starting a new chat session
        func_chat: the function for chatting interaction
        func_end_chat: the function for ending and saving the chat
        func_export_to_excel: the function to export the logs to excel

    Returns:
        the UI object
    """
    inputs, outputs, params = [], [], []
    for name, component_def in config.get("inputs", {}).items():
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = True
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = name  # type: ignore

        inputs.append(component)

    for name, component_def in config.get("params", {}).items():
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = True
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = name  # type: ignore

        params.append(component)

    for idx, component_def in enumerate(config.get("outputs", [])):
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = False
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = f"Output {idx}"  # type: ignore

        outputs.append(component)

    sess = gr.State(value=None)
    chatbot = gr.Chatbot(label="Chatbot", show_copy_button=True)
    chat = ChatBlock(
        func_chat, chatbot=chatbot, additional_inputs=[sess], additional_outputs=outputs
    )
    param_state = gr.Textbox(interactive=False)

    with gr.Blocks(analytics_enabled=False, title="Welcome to PromptUI") as demo:
        sess.render()
        with gr.Accordion(label="HOW TO", open=False):
            gr.Markdown(USAGE_INSTRUCTION)
        with gr.Row():
            run_btn = gr.Button("New chat")
            run_btn.click(
                func_new_chat,
                inputs=params,
                outputs=[
                    chat.chatbot,
                    chat.chatbot_state,
                    chat.saved_input,
                    param_state,
                    sess,
                    *outputs,
                ],
            )
            with gr.Accordion(label="End chat", open=False):
                likes = gr.Radio(["like", "dislike", "neutral"], value="neutral")
                save_log = gr.Checkbox(
                    value=True,
                    label="Save log",
                    info="If saved, log can be exported later",
                    show_label=True,
                )
                end_btn = gr.Button("End chat")
                end_btn.click(
                    func_end_chat,
                    inputs=[likes, save_log, sess],
                    outputs=[param_state, sess],
                )
            with gr.Accordion(label="Export", open=False):
                exported_file = gr.File(
                    label="Output file", show_label=True, height=100
                )
                export_btn = gr.Button("Export")
                export_btn.click(func_export_to_excel, inputs=[], outputs=exported_file)

        with gr.Row():
            with gr.Column():
                with gr.Tab("Params"):
                    for component in params:
                        component.render()
                    with gr.Accordion(label="Session state", open=False):
                        param_state.render()

                with gr.Tab("Outputs"):
                    for component in outputs:
                        component.render()
            with gr.Column():
                chat.render()

    return demo.queue()


def build_chat_ui(config, pipeline_def):
    """Build the chat UI

    Args:
        config: the UI config
        pipeline_def: the pipeline definition

    Returns:
        the UI object
    """
    output_dir: Path = Path(storage.url(pipeline_def().config.store_result))
    exported_dir = output_dir.parent / "exported"
    exported_dir.mkdir(parents=True, exist_ok=True)

    resultlog = getattr(pipeline_def, "_promptui_resultlog", ResultLog)
    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith("__")}

    def new_chat(*args):
        """Start a new chat function

        Args:
            *args: the pipeline init params

        Returns:
            new empty states
        """
        gr.Info("Starting new session...")
        param_dicts = {
            name: value for name, value in zip(config["params"].keys(), args)
        }
        for key in param_dicts.keys():
            if config["params"][key].get("component").lower() == "file":
                param_dicts[key] = param_dicts[key].name

        # TODO: currently hard-code as ChatConversation
        pipeline = pipeline_def()
        session = ChatConversation(bot=pipeline)
        session.set(param_dicts)
        session.start_session()

        param_state_str = "\n".join(
            f"- {name}: {value}" for name, value in param_dicts.items()
        )

        gr.Info("New chat session started.")
        return (
            [],
            [],
            None,
            param_state_str,
            session,
            *[None] * len(config.get("outputs", [])),
        )

    def chat(message, history, session, *args):
        """The chat interface

        # TODO: wrap the input and output of this chat function so that it
        work with more types of chat conversation than simple text

        Args:
            message: the message from the user
            history: the gradio history of the chat
            session: the chat object session
            *args: the additional inputs

        Returns:
            the response from the chatbot
        """
        if session is None:
            raise gr.Error(
                "No active chat session. Please set the params and click New chat"
            )

        pred = session(message)
        text_response = pred.content

        additional_outputs = []
        for output_def in config.get("outputs", []):
            value = session.last_run.logs(output_def["step"])
            getter = output_def.get("getter", None)
            if getter and getter in allowed_resultlog_callbacks:
                value = getattr(resultlog, getter)(value)
            additional_outputs.append(value)

        return text_response, *additional_outputs

    def end_chat(preference: str, save_log: bool, session):
        """End the chat session

        Args:
            preference: the preference of the user
            save_log: whether to save the result
            session: the chat object session

        Returns:
            the new empty state
        """
        gr.Info("Ending session...")
        session.end_session()
        output_dir: Path = (
            Path(storage.url(session.config.store_result)) / session.last_run.id()
        )

        if not save_log:
            if output_dir.exists():
                import shutil

                shutil.rmtree(output_dir)

            session = None
            param_state = ""
            gr.Info("End session without saving log.")
            return param_state, session

        # add preference result to progress
        with (output_dir / "progress.pkl").open("rb") as fi:
            progress = pickle.load(fi)
            progress["preference"] = preference
        with (output_dir / "progress.pkl").open("wb") as fo:
            pickle.dump(progress, fo)

        # get the original params
        param_dicts = {name: session.getx(name) for name in config["params"].keys()}
        with (output_dir / "params.pkl").open("wb") as fo:
            pickle.dump(param_dicts, fo)

        session = None
        param_state = ""
        gr.Info("End session and save log.")
        return param_state, session

    def export_func():
        name = (
            f"{pipeline_def.__module__}.{pipeline_def.__name__}_{datetime.now()}.xlsx"
        )
        path = str(exported_dir / name)
        gr.Info(f"Begin exporting {name}...")
        try:
            export(config=config, pipeline_def=pipeline_def, output_path=path)
        except Exception as e:
            raise gr.Error(f"Failed to export. Please contact project's AIR: {e}")
        gr.Info(f"Exported {name}. Please go to the `Exported file` tab to download")
        return path

    demo = construct_chat_ui(
        config=config,
        func_new_chat=new_chat,
        func_chat=chat,
        func_end_chat=end_chat,
        func_export_to_excel=export_func,
    )
    return demo


================================================
FILE: libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py
================================================
import pickle
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Dict

import gradio as gr
import pandas as pd
from theflow.storage import storage

from kotaemon.contribs.promptui.base import get_component
from kotaemon.contribs.promptui.export import export

from ..logs import ResultLog

USAGE_INSTRUCTION = """## How to use:

1. Set the desired parameters.
2. Set the desired inputs.
3. Click "Run" to execute the pipeline with the supplied parameters and inputs
4. The pipeline output will show up in the output panel.
5. Repeat from step 1.
6. To compare the result of different run, click "Export" to get an Excel
    spreadsheet summary of different run.

## Support:

In case of errors, you can:

- PromptUI instruction:
    https://github.com/Cinnamon/kotaemon/wiki/Utilities#prompt-engineering-ui
- Create bug fix and make PR at: https://github.com/Cinnamon/kotaemon
- Ping any of @john @tadashi @ian @jacky in Slack channel #llm-productization

## Contribute:

- Follow installation at: https://github.com/Cinnamon/kotaemon/
"""


def construct_pipeline_ui(
    config, func_run, func_save, func_load_params, func_activate_params, func_export
) -> gr.Blocks:
    """Create UI from config file. Execute the UI from config file

    - Can do now: Log from stdout to UI
    - In the future, we can provide some hooks and callbacks to let developers better
    fine-tune the UI behavior.
    """
    inputs, outputs, params = [], [], []
    for name, component_def in config.get("inputs", {}).items():
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = True
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = name  # type: ignore

        inputs.append(component)

    for name, component_def in config.get("params", {}).items():
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = True
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = name  # type: ignore

        params.append(component)

    for idx, component_def in enumerate(config.get("outputs", [])):
        if "params" not in component_def:
            component_def["params"] = {}
        component_def["params"]["interactive"] = False
        component = get_component(component_def)
        if hasattr(component, "label") and not component.label:  # type: ignore
            component.label = f"Output {idx}"  # type: ignore

        outputs.append(component)

    exported_file = gr.File(label="Output file", show_label=True)
    history_dataframe = gr.DataFrame(wrap=True)

    temp = gr.Tab
    with gr.Blocks(analytics_enabled=False, title="Welcome to PromptUI") as demo:
        with gr.Accordion(label="HOW TO", open=False):
            gr.Markdown(USAGE_INSTRUCTION)
        with gr.Accordion(label="Params History", open=False):
            with gr.Row():
                save_btn = gr.Button("Save params")
                save_btn.click(func_save, inputs=params, outputs=history_dataframe)
                load_params_btn = gr.Button("Reload params")
                load_params_btn.click(
                    func_load_params, inputs=[], outputs=history_dataframe
                )
            history_dataframe.render()
            history_dataframe.select(
                func_activate_params, inputs=params, outputs=params
            )
        with gr.Row():
            run_btn = gr.Button("Run")
            run_btn.click(func_run, inputs=inputs + params, outputs=outputs)
            export_btn = gr.Button(
                "Export (Result will be in Exported file next to Output)"
            )
            export_btn.click(func_export, inputs=[], outputs=exported_file)
        with gr.Row():
            with gr.Column():
                if params:
                    with temp("Params"):
                        for component in params:
                            component.render()
                if inputs:
                    with temp("Inputs"):
                        for component in inputs:
                            component.render()
                if not params and not inputs:
                    gr.Text("No params or inputs")
            with gr.Column():
                with temp("Outputs"):
                    for component in outputs:
                        component.render()
                with temp("Exported file"):
                    exported_file.render()

    return demo


def load_saved_params(path: str) -> Dict:
    """Load the saved params from path to a dataframe"""
    # get all pickle files
    files = list(sorted(Path(path).glob("*.pkl")))
    data: Dict[str, Any] = {"_id": [None] * len(files)}
    for idx, each_file in enumerate(files):
        with open(each_file, "rb") as f:
            each_data = pickle.load(f)
        data["_id"][idx] = Path(each_file).stem
        for key, value in each_data.items():
            if key not in data:
                data[key] = [None] * len(files)
            data[key][idx] = value

    return data


def build_pipeline_ui(config: dict, pipeline_def):
    """Build a tab from config file"""
    inputs_name = list(config.get("inputs", {}).keys())
    params_name = list(config.get("params", {}).keys())
    outputs_def = config.get("outputs", [])

    output_dir: Path = Path(storage.url(pipeline_def().config.store_result))
    exported_dir = output_dir.parent / "exported"
    exported_dir.mkdir(parents=True, exist_ok=True)

    save_dir = (
        output_dir.parent
        / "saved"
        / f"{pipeline_def.__module__}.{pipeline_def.__name__}"
    )
    save_dir.mkdir(parents=True, exist_ok=True)

    resultlog = getattr(pipeline_def, "_promptui_resultlog", ResultLog)
    allowed_resultlog_callbacks = {i for i in dir(resultlog) if not i.startswith("__")}

    def run_func(*args):
        inputs = {
            name: value for name, value in zip(inputs_name, args[: len(inputs_name)])
        }
        params = {
            name: value for name, value in zip(params_name, args[len(inputs_name) :])
        }
        pipeline = pipeline_def()
        pipeline.set(params)
        pipeline(**inputs)
        with storage.open(
            storage.url(
                pipeline.config.store_result, pipeline.last_run.id(), "params.pkl"
            ),
            "wb",
        ) as f:
            pickle.dump(params, f)
        if outputs_def:
            outputs = []
            for output_def in outputs_def:
                output = pipeline.last_run.logs(output_def["step"])
                getter = output_def.get("getter", None)
                if getter and getter in allowed_resultlog_callbacks:
                    output = getattr(resultlog, getter)(output)
                outputs.append(output)
            if len(outputs_def) == 1:
                return outputs[0]
            return outputs

    def save_func(*args):
        params = {name: value for name, value in zip(params_name, args)}
        filename = save_dir / f"{int(time.time())}.pkl"
        with open(filename, "wb") as f:
            pickle.dump(params, f)
        gr.Info("Params saved")

        data = load_saved_params(str(save_dir))
        return pd.DataFrame(data)

    def load_params_func():
        data = load_saved_params(str(save_dir))
        return pd.DataFrame(data)

    def activate_params_func(ev: gr.SelectData, *args):
        data = load_saved_params(str(save_dir))
        output_args = [each for each in args]
        if ev.value is None:
            gr.Info(f'Blank value: "{ev.value}". Skip')
            return output_args

        column = list(data.keys())[ev.index[1]]

        if column not in params_name:
            gr.Info(f'Column "{column}" not in params. Skip')
            return output_args

        value = data[column][ev.index[0]]
        if value is None:
            gr.Info(f'Blank value: "{ev.value}". Skip')
            return output_args

        output_args[params_name.index(column)] = value

        return output_args

    def export_func():
        name = (
            f"{pipeline_def.__module__}.{pipeline_def.__name__}_{datetime.now()}.xlsx"
        )
        path = str(exported_dir / name)
        gr.Info(f"Begin exporting {name}...")
        try:
            export(config=config, pipeline_def=pipeline_def, output_path=path)
        except Exception as e:
            raise gr.Error(f"Failed to export. Please contact project's AIR: {e}")
        gr.Info(f"Exported {name}. Please go to the `Exported file` tab to download")
        return path

    return construct_pipeline_ui(
        config, run_func, save_func, load_params_func, activate_params_func, export_func
    )


================================================
FILE: libs/kotaemon/kotaemon/embeddings/__init__.py
================================================
from .base import BaseEmbeddings
from .endpoint_based import EndpointEmbeddings
from .fastembed import FastEmbedEmbeddings
from .langchain_based import (
    LCAzureOpenAIEmbeddings,
    LCCohereEmbeddings,
    LCGoogleEmbeddings,
    LCHuggingFaceEmbeddings,
    LCMistralEmbeddings,
    LCOpenAIEmbeddings,
)
from .openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from .tei_endpoint_embed import TeiEndpointEmbeddings
from .voyageai import VoyageAIEmbeddings

__all__ = [
    "BaseEmbeddings",
    "EndpointEmbeddings",
    "TeiEndpointEmbeddings",
    "LCOpenAIEmbeddings",
    "LCAzureOpenAIEmbeddings",
    "LCCohereEmbeddings",
    "LCHuggingFaceEmbeddings",
    "LCGoogleEmbeddings",
    "LCMistralEmbeddings",
    "OpenAIEmbeddings",
    "AzureOpenAIEmbeddings",
    "FastEmbedEmbeddings",
    "VoyageAIEmbeddings",
]


================================================
FILE: libs/kotaemon/kotaemon/embeddings/base.py
================================================
from __future__ import annotations

from kotaemon.base import BaseComponent, Document, DocumentWithEmbedding


class BaseEmbeddings(BaseComponent):
    def run(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        return self.invoke(text, *args, **kwargs)

    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        raise NotImplementedError

    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        raise NotImplementedError

    def prepare_input(
        self, text: str | list[str] | Document | list[Document]
    ) -> list[Document]:
        if isinstance(text, (str, Document)):
            return [Document(content=text)]
        elif isinstance(text, list):
            return [Document(content=_) for _ in text]
        return text


================================================
FILE: libs/kotaemon/kotaemon/embeddings/endpoint_based.py
================================================
import requests

from kotaemon.base import Document, DocumentWithEmbedding

from .base import BaseEmbeddings


class EndpointEmbeddings(BaseEmbeddings):
    """
    An Embeddings component that uses an OpenAI API compatible endpoint.

    Attributes:
        endpoint_url (str): The url of an OpenAI API compatible endpoint.
    """

    endpoint_url: str

    def run(
        self, text: str | list[str] | Document | list[Document]
    ) -> list[DocumentWithEmbedding]:
        """
        Generate embeddings from text Args:
            text (str | list[str] | Document | list[Document]): text to generate
            embeddings from
        Returns:
            list[DocumentWithEmbedding]: embeddings
        """
        if not isinstance(text, list):
            text = [text]

        outputs = []

        for item in text:
            response = requests.post(
                self.endpoint_url, json={"input": str(item)}
            ).json()
            outputs.append(
                DocumentWithEmbedding(
                    text=str(item),
                    embedding=response["data"][0]["embedding"],
                    total_tokens=response["usage"]["total_tokens"],
                    prompt_tokens=response["usage"]["prompt_tokens"],
                )
            )

        return outputs


================================================
FILE: libs/kotaemon/kotaemon/embeddings/fastembed.py
================================================
from typing import TYPE_CHECKING, Optional

from kotaemon.base import Document, DocumentWithEmbedding, Param

from .base import BaseEmbeddings

if TYPE_CHECKING:
    from fastembed import TextEmbedding


class FastEmbedEmbeddings(BaseEmbeddings):
    """Utilize fastembed library for embeddings locally without GPU.

    Supported model: https://qdrant.github.io/fastembed/examples/Supported_Models/
    Code: https://github.com/qdrant/fastembed
    """

    model_name: str = Param(
        "BAAI/bge-small-en-v1.5",
        help=(
            "Model name for fastembed. Please refer "
            "[here](https://qdrant.github.io/fastembed/examples/Supported_Models/) "
            "for the list of supported models."
        ),
        required=True,
    )
    batch_size: int = Param(
        256,
        help="Batch size for embeddings. Higher values use more memory, but are faster",
    )
    parallel: Optional[int] = Param(
        None,
        help=(
            "Number of threads to use for embeddings. "
            "If > 1, data-parallel encoding will be used. "
            "If 0, use all available CPUs. "
            "If None, use default onnxruntime threading. "
            "Defaults to None."
        ),
    )

    @Param.auto()
    def client_(self) -> "TextEmbedding":
        try:
            from fastembed import TextEmbedding
        except ImportError:
            raise ImportError("Please install FastEmbed: `pip install fastembed`")

        return TextEmbedding(model_name=self.model_name)

    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        input_ = self.prepare_input(text)
        embeddings = self.client_.embed(
            [_.content for _ in input_],
            batch_size=self.batch_size,
            parallel=self.parallel,
        )
        return [
            DocumentWithEmbedding(
                content=doc,
                embedding=list(embedding),
            )
            for doc, embedding in zip(input_, embeddings)
        ]

    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        """Fastembed does not support async API."""
        return self.invoke(text, *args, **kwargs)


================================================
FILE: libs/kotaemon/kotaemon/embeddings/langchain_based.py
================================================
from typing import Optional

from kotaemon.base import DocumentWithEmbedding, Param

from .base import BaseEmbeddings


class LCEmbeddingMixin:
    def _get_lc_class(self):
        raise NotImplementedError(
            "Please return the relevant Langchain class in in _get_lc_class"
        )

    def __init__(self, **params):
        self._lc_class = self._get_lc_class()
        self._obj = self._lc_class(**params)
        self._kwargs: dict = params

        super().__init__()

    def run(self, text):
        input_docs = self.prepare_input(text)
        input_ = [doc.text for doc in input_docs]

        embeddings = self._obj.embed_documents(input_)

        return [
            DocumentWithEmbedding(content=doc, embedding=each_embedding)
            for doc, each_embedding in zip(input_docs, embeddings)
        ]

    def __repr__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = repr(value_obj)
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __str__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = str(value_obj)
            if len(value) > 20:
                value = f"{value[:15]}..."
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __setattr__(self, name, value):
        if name == "_lc_class":
            return super().__setattr__(name, value)

        if name in self._lc_class.__fields__:
            self._kwargs[name] = value
            self._obj = self._lc_class(**self._kwargs)
        else:
            super().__setattr__(name, value)

    def __getattr__(self, name):
        if name in self._kwargs:
            return self._kwargs[name]
        return getattr(self._obj, name)

    def dump(self, *args, **kwargs):
        from theflow.utils.modules import serialize

        params = {key: serialize(value) for key, value in self._kwargs.items()}
        return {
            "__type__": f"{self.__module__}.{self.__class__.__qualname__}",
            **params,
        }

    def specs(self, path: str):
        path = path.strip(".")
        if "." in path:
            raise ValueError("path should not contain '.'")

        if path in self._lc_class.__fields__:
            return {
                "__type__": "theflow.base.ParamAttr",
                "refresh_on_set": True,
                "strict_type": True,
            }

        raise ValueError(f"Invalid param {path}")


class LCOpenAIEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around Langchain's OpenAI embedding, focusing on key parameters"""

    def __init__(
        self,
        model: str = "text-embedding-ada-002",
        openai_api_version: Optional[str] = None,
        openai_api_base: Optional[str] = None,
        openai_api_type: Optional[str] = None,
        openai_api_key: Optional[str] = None,
        request_timeout: Optional[float] = None,
        **params,
    ):
        super().__init__(
            model=model,
            openai_api_version=openai_api_version,
            openai_api_base=openai_api_base,
            openai_api_type=openai_api_type,
            openai_api_key=openai_api_key,
            request_timeout=request_timeout,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import OpenAIEmbeddings
        except ImportError:
            from langchain.embeddings import OpenAIEmbeddings

        return OpenAIEmbeddings


class LCAzureOpenAIEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around Langchain's AzureOpenAI embedding, focusing on key parameters"""

    def __init__(
        self,
        azure_endpoint: Optional[str] = None,
        deployment: Optional[str] = None,
        openai_api_key: Optional[str] = None,
        api_version: Optional[str] = None,
        request_timeout: Optional[float] = None,
        **params,
    ):
        super().__init__(
            azure_endpoint=azure_endpoint,
            deployment=deployment,
            api_version=api_version,
            openai_api_key=openai_api_key,
            request_timeout=request_timeout,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import AzureOpenAIEmbeddings
        except ImportError:
            from langchain.embeddings import AzureOpenAIEmbeddings

        return AzureOpenAIEmbeddings


class LCCohereEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around Langchain's Cohere embedding, focusing on key parameters"""

    cohere_api_key: str = Param(
        help="API key (https://dashboard.cohere.com/api-keys)",
        default=None,
        required=True,
    )
    model: str = Param(
        help="Model name to use (https://docs.cohere.com/docs/models)",
        default=None,
        required=True,
    )
    user_agent: str = Param(
        help="User agent (leave default)", default="default", required=True
    )

    def __init__(
        self,
        model: str = "embed-english-v2.0",
        cohere_api_key: Optional[str] = None,
        truncate: Optional[str] = None,
        request_timeout: Optional[float] = None,
        **params,
    ):
        super().__init__(
            model=model,
            cohere_api_key=cohere_api_key,
            truncate=truncate,
            request_timeout=request_timeout,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_cohere import CohereEmbeddings
        except ImportError:
            from langchain.embeddings import CohereEmbeddings

        return CohereEmbeddings


class LCHuggingFaceEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around Langchain's HuggingFace embedding, focusing on key parameters"""

    model_name: str = Param(
        help=(
            "Model name to use (https://huggingface.co/models?"
            "pipeline_tag=sentence-similarity&sort=trending)"
        ),
        default=None,
        required=True,
    )

    def __init__(
        self,
        model_name: str = "sentence-transformers/all-mpnet-base-v2",
        **params,
    ):
        super().__init__(
            model_name=model_name,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_community.embeddings import HuggingFaceBgeEmbeddings
        except ImportError:
            from langchain.embeddings import HuggingFaceBgeEmbeddings

        return HuggingFaceBgeEmbeddings


class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around Langchain's Google GenAI embedding, focusing on key parameters"""

    google_api_key: str = Param(
        help="API key (https://aistudio.google.com/app/apikey)",
        default=None,
        required=True,
    )
    model: str = Param(
        help="Model name to use (https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding)",  # noqa
        default="models/text-embedding-004",
        required=True,
    )

    def __init__(
        self,
        model: str = "models/text-embedding-004",
        google_api_key: Optional[str] = None,
        **params,
    ):
        super().__init__(
            model=model,
            google_api_key=google_api_key,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_google_genai import GoogleGenerativeAIEmbeddings
        except ImportError:
            raise ImportError("Please install langchain-google-genai")

        return GoogleGenerativeAIEmbeddings


class LCMistralEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
    """Wrapper around LangChain's MistralAI embedding, focusing on key parameters"""

    api_key: str = Param(
        help="API key (https://console.mistral.ai/api-keys)",
        default=None,
        required=True,
    )
    model: str = Param(
        help="Model name to use ('mistral-embed')",
        default="mistral-embed",
        required=True,
    )

    def __init__(
        self,
        model: str = "mistral-embed",
        api_key: Optional[str] = None,
        **params,
    ):
        super().__init__(
            model=model,
            api_key=api_key,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_mistralai import MistralAIEmbeddings
        except ImportError:
            raise ImportError(
                "Please install langchain_mistralai: "
                "`pip install -U langchain_mistralai`"
            )
        return MistralAIEmbeddings


================================================
FILE: libs/kotaemon/kotaemon/embeddings/openai.py
================================================
from itertools import islice
from typing import Optional

import numpy as np
import openai
import tiktoken
from tenacity import (
    retry,
    retry_if_not_exception_type,
    stop_after_attempt,
    wait_random_exponential,
)
from theflow.utils.modules import import_dotted_string

from kotaemon.base import Param

from .base import BaseEmbeddings, Document, DocumentWithEmbedding


def split_text_by_chunk_size(text: str, chunk_size: int) -> list[list[int]]:
    """Split the text into chunks of a given size

    Args:
        text: text to split
        chunk_size: size of each chunk

    Returns:
        list of chunks (as tokens)
    """
    encoding = tiktoken.get_encoding("cl100k_base")
    tokens = iter(encoding.encode(text))
    result = []
    while chunk := list(islice(tokens, chunk_size)):
        result.append(chunk)
    return result


class BaseOpenAIEmbeddings(BaseEmbeddings):
    """Base interface for OpenAI embedding model, using the openai library.

    This class exposes the parameters in resources.Chat. To subclass this class:

        - Implement the `prepare_client` method to return the OpenAI client
        - Implement the `openai_response` method to return the OpenAI response
        - Implement the params relate to the OpenAI client
    """

    _dependencies = ["openai"]

    api_key: str = Param(None, help="API key", required=True)
    timeout: Optional[float] = Param(None, help="Timeout for the API request.")
    max_retries: Optional[int] = Param(
        None, help="Maximum number of retries for the API request."
    )

    dimensions: Optional[int] = Param(
        None,
        help=(
            "The number of dimensions the resulting output embeddings should have. "
            "Only supported in `text-embedding-3` and later models."
        ),
    )
    context_length: Optional[int] = Param(
        None, help="The maximum context length of the embedding model"
    )

    @Param.auto(depends_on=["max_retries"])
    def max_retries_(self):
        if self.max_retries is None:
            from openai._constants import DEFAULT_MAX_RETRIES

            return DEFAULT_MAX_RETRIES
        return self.max_retries

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        raise NotImplementedError

    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        raise NotImplementedError

    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        input_doc = self.prepare_input(text)
        client = self.prepare_client(async_version=False)

        input_: list[str | list[int]] = []
        splitted_indices = {}
        for idx, text in enumerate(input_doc):
            if self.context_length:
                chunks = split_text_by_chunk_size(text.text or " ", self.context_length)
                splitted_indices[idx] = (len(input_), len(input_) + len(chunks))
                input_.extend(chunks)
            else:
                splitted_indices[idx] = (len(input_), len(input_) + 1)
                input_.append(text.text)

        resp = self.openai_response(client, input=input_, **kwargs).dict()
        output_ = list(sorted(resp["data"], key=lambda x: x["index"]))

        output = []
        for idx, doc in enumerate(input_doc):
            embs = output_[splitted_indices[idx][0] : splitted_indices[idx][1]]
            if len(embs) == 1:
                output.append(
                    DocumentWithEmbedding(embedding=embs[0]["embedding"], content=doc)
                )
                continue

            chunk_lens = [
                len(_)
                for _ in input_[splitted_indices[idx][0] : splitted_indices[idx][1]]
            ]
            vs: list[list[float]] = [_["embedding"] for _ in embs]
            emb = np.average(vs, axis=0, weights=chunk_lens)
            emb = emb / np.linalg.norm(emb)
            output.append(DocumentWithEmbedding(embedding=emb.tolist(), content=doc))

        return output

    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        input_ = self.prepare_input(text)
        client = self.prepare_client(async_version=True)
        resp = await self.openai_response(
            client, input=[_.text if _.text else " " for _ in input_], **kwargs
        ).dict()
        output_ = sorted(resp["data"], key=lambda x: x["index"])
        return [
            DocumentWithEmbedding(embedding=o["embedding"], content=i)
            for i, o in zip(input_, output_)
        ]


class OpenAIEmbeddings(BaseOpenAIEmbeddings):
    """OpenAI chat model"""

    base_url: Optional[str] = Param(None, help="OpenAI base URL")
    organization: Optional[str] = Param(None, help="OpenAI organization")
    model: str = Param(
        None,
        help=(
            "ID of the model to use. You can go to [Model overview](https://platform."
            "openai.com/docs/models/overview) to see the available models."
        ),
        required=True,
    )

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        params = {
            "api_key": self.api_key,
            "organization": self.organization,
            "base_url": self.base_url,
            "timeout": self.timeout,
            "max_retries": self.max_retries_,
        }
        if async_version:
            from openai import AsyncOpenAI

            return AsyncOpenAI(**params)

        from openai import OpenAI

        return OpenAI(**params)

    @retry(
        retry=retry_if_not_exception_type(
            (openai.NotFoundError, openai.BadRequestError)
        ),
        wait=wait_random_exponential(min=1, max=40),
        stop=stop_after_attempt(6),
    )
    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params: dict = {
            "model": self.model,
        }
        if self.dimensions:
            params["dimensions"] = self.dimensions
        params.update(kwargs)

        return client.embeddings.create(**params)


class AzureOpenAIEmbeddings(BaseOpenAIEmbeddings):
    azure_endpoint: str = Param(
        None,
        help=(
            "HTTPS endpoint for the Azure OpenAI model. The azure_endpoint, "
            "azure_deployment, and api_version parameters are used to construct "
            "the full URL for the Azure OpenAI model."
        ),
        required=True,
    )
    azure_deployment: str = Param(None, help="Azure deployment name", required=True)
    api_version: str = Param(None, help="Azure model version", required=True)
    azure_ad_token: Optional[str] = Param(None, help="Azure AD token")
    azure_ad_token_provider: Optional[str] = Param(None, help="Azure AD token provider")

    @Param.auto(depends_on=["azure_ad_token_provider"])
    def azure_ad_token_provider_(self):
        if isinstance(self.azure_ad_token_provider, str):
            return import_dotted_string(self.azure_ad_token_provider, safe=False)

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        params = {
            "azure_endpoint": self.azure_endpoint,
            "api_version": self.api_version,
            "api_key": self.api_key,
            "azure_ad_token": self.azure_ad_token,
            "azure_ad_token_provider": self.azure_ad_token_provider_,
            "timeout": self.timeout,
            "max_retries": self.max_retries_,
        }
        if async_version:
            from openai import AsyncAzureOpenAI

            return AsyncAzureOpenAI(**params)

        from openai import AzureOpenAI

        return AzureOpenAI(**params)

    @retry(
        retry=retry_if_not_exception_type(
            (openai.NotFoundError, openai.BadRequestError)
        ),
        wait=wait_random_exponential(min=1, max=40),
        stop=stop_after_attempt(6),
    )
    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params: dict = {
            "model": self.azure_deployment,
        }
        if self.dimensions:
            params["dimensions"] = self.dimensions
        params.update(kwargs)

        return client.embeddings.create(**params)


================================================
FILE: libs/kotaemon/kotaemon/embeddings/tei_endpoint_embed.py
================================================
import aiohttp
import requests

from kotaemon.base import Document, DocumentWithEmbedding, Param

from .base import BaseEmbeddings

session = requests.session()


class TeiEndpointEmbeddings(BaseEmbeddings):
    """An Embeddings component that uses an
    TEI (Text-Embedding-Inference) API compatible endpoint.

    Ref: https://github.com/huggingface/text-embeddings-inference

    Attributes:
        endpoint_url (str): The url of an TEI
            (Text-Embedding-Inference) API compatible endpoint.
        normalize (bool): Whether to normalize embeddings to unit length.
        truncate (bool): Whether to truncate embeddings
            to a fixed/default length.
    """

    endpoint_url: str = Param(None, help="TEI embedding service api base URL")
    normalize: bool = Param(
        True,
        help="Normalize embeddings to unit length",
    )
    truncate: bool = Param(
        True,
        help="Truncate embeddings to a fixed/default length",
    )

    async def client_(self, inputs: list[str]):
        async with aiohttp.ClientSession() as session:
            async with session.post(
                url=self.endpoint_url,
                json={
                    "inputs": inputs,
                    "normalize": self.normalize,
                    "truncate": self.truncate,
                },
            ) as resp:
                embeddings = await resp.json()
        return embeddings

    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        if not isinstance(text, list):
            text = [text]
        text = self.prepare_input(text)

        outputs = []
        batch_size = 6
        num_batch = max(len(text) // batch_size, 1)
        for i in range(num_batch):
            if i == num_batch - 1:
                mini_batch = text[batch_size * i :]
            else:
                mini_batch = text[batch_size * i : batch_size * (i + 1)]
            mini_batch = [x.content for x in mini_batch]
            embeddings = await self.client_(mini_batch)  # type: ignore
            outputs.extend(
                [
                    DocumentWithEmbedding(content=doc, embedding=embedding)
                    for doc, embedding in zip(mini_batch, embeddings)
                ]
            )

        return outputs

    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        if not isinstance(text, list):
            text = [text]

        text = self.prepare_input(text)

        outputs = []
        batch_size = 6
        num_batch = max(len(text) // batch_size, 1)
        for i in range(num_batch):
            if i == num_batch - 1:
                mini_batch = text[batch_size * i :]
            else:
                mini_batch = text[batch_size * i : batch_size * (i + 1)]
            mini_batch = [x.content for x in mini_batch]
            embeddings = session.post(
                url=self.endpoint_url,
                json={
                    "inputs": mini_batch,
                    "normalize": self.normalize,
                    "truncate": self.truncate,
                },
            ).json()
            outputs.extend(
                [
                    DocumentWithEmbedding(content=doc, embedding=embedding)
                    for doc, embedding in zip(mini_batch, embeddings)
                ]
            )
        return outputs


================================================
FILE: libs/kotaemon/kotaemon/embeddings/voyageai.py
================================================
"""Implements embeddings from [Voyage AI](https://voyageai.com).
"""

import importlib

from kotaemon.base import Document, DocumentWithEmbedding, Param

from .base import BaseEmbeddings

vo = None


def _import_voyageai():
    global vo
    if not vo:
        vo = importlib.import_module("voyageai")
    return vo


def _format_output(texts: list[str], embeddings: list[list]):
    """Formats the output of all `.embed` calls.
    Args:
        texts: List of original documents
        embeddings: Embeddings corresponding to each document
    """
    return [
        DocumentWithEmbedding(content=text, embedding=embedding)
        for text, embedding in zip(texts, embeddings)
    ]


class VoyageAIEmbeddings(BaseEmbeddings):
    """Voyage AI provides best-in-class embedding models and rerankers."""

    api_key: str = Param(None, help="Voyage API key", required=False)
    model: str = Param(
        "voyage-3",
        help=(
            "Model name to use. The Voyage "
            "[documentation](https://docs.voyageai.com/docs/embeddings) "
            "provides a list of all available embedding models."
        ),
        required=True,
    )

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if not self.api_key:
            raise ValueError("API key must be provided for VoyageAIEmbeddings.")

        self._client = _import_voyageai().Client(api_key=self.api_key)
        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)

    def invoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        texts = [t.content for t in self.prepare_input(text)]
        embeddings = self._client.embed(texts, model=self.model).embeddings
        return _format_output(texts, embeddings)

    async def ainvoke(
        self, text: str | list[str] | Document | list[Document], *args, **kwargs
    ) -> list[DocumentWithEmbedding]:
        texts = [t.content for t in self.prepare_input(text)]
        embeddings = await self._aclient.embed(texts, model=self.model).embeddings
        return _format_output(texts, embeddings)


================================================
FILE: libs/kotaemon/kotaemon/indices/__init__.py
================================================
from .vectorindex import VectorIndexing, VectorRetrieval

__all__ = ["VectorIndexing", "VectorRetrieval"]


================================================
FILE: libs/kotaemon/kotaemon/indices/base.py
================================================
from __future__ import annotations

from abc import abstractmethod
from typing import Any, Type

from llama_index.core.node_parser.interface import NodeParser

from kotaemon.base import BaseComponent, Document, RetrievedDocument


class DocTransformer(BaseComponent):
    """This is a base class for document transformers

    A document transformer transforms a list of documents into another list
    of documents. Transforming can mean splitting a document into multiple documents,
    reducing a large list of documents into a smaller list of documents, or adding
    metadata to each document in a list of documents, etc.
    """

    @abstractmethod
    def run(
        self,
        documents: list[Document],
        **kwargs,
    ) -> list[Document]:
        ...


class LlamaIndexDocTransformerMixin:
    """Allow automatically wrapping a Llama-index component into kotaemon component

    Example:
        class TokenSplitter(LlamaIndexMixin, BaseSplitter):
            def _get_li_class(self):
                from llama_index.core.text_splitter import TokenTextSplitter
                return TokenTextSplitter

    To use this mixin, please:
        1. Use this class as the 1st parent class, so that Python will prefer to use
        the attributes and methods of this class whenever possible.
        2. Overwrite `_get_li_class` to return the relevant LlamaIndex component.
    """

    def _get_li_class(self) -> Type[NodeParser]:
        raise NotImplementedError(
            "Please return the relevant LlamaIndex class in _get_li_class"
        )

    def __init__(self, **params):
        self._li_cls = self._get_li_class()
        self._obj = self._li_cls(**params)
        self._kwargs = params
        super().__init__()

    def __repr__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = repr(value_obj)
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __str__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = str(value_obj)
            if len(value) > 20:
                value = f"{value[:15]}..."
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __setattr__(self, name: str, value: Any) -> None:
        if name.startswith("_") or name in self._protected_keywords():
            return super().__setattr__(name, value)

        self._kwargs[name] = value
        return setattr(self._obj, name, value)

    def __getattr__(self, name: str) -> Any:
        if name in self._kwargs:
            return self._kwargs[name]
        return getattr(self._obj, name)

    def dump(self, *args, **kwargs):
        from theflow.utils.modules import serialize

        params = {key: serialize(value) for key, value in self._kwargs.items()}
        return {
            "__type__": f"{self.__module__}.{self.__class__.__qualname__}",
            **params,
        }

    def run(
        self,
        documents: list[Document],
        **kwargs,
    ) -> list[Document]:
        """Run Llama-index node parser and convert the output to Document from
        kotaemon
        """
        docs = self._obj(documents, **kwargs)  # type: ignore
        return [Document.from_dict(doc.to_dict()) for doc in docs]


class BaseIndexing(BaseComponent):
    """Define the base interface for indexing pipeline"""

    def to_retrieval_pipeline(self, **kwargs):
        """Convert the indexing pipeline to a retrieval pipeline"""
        raise NotImplementedError

    def to_qa_pipeline(self, **kwargs):
        """Convert the indexing pipeline to a QA pipeline"""
        raise NotImplementedError


class BaseRetrieval(BaseComponent):
    """Define the base interface for retrieval pipeline"""

    @abstractmethod
    def run(self, *args, **kwargs) -> list[RetrievedDocument]:
        ...


================================================
FILE: libs/kotaemon/kotaemon/indices/extractors/__init__.py
================================================
from .doc_parsers import BaseDocParser, SummaryExtractor, TitleExtractor

__all__ = [
    "BaseDocParser",
    "TitleExtractor",
    "SummaryExtractor",
]


================================================
FILE: libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py
================================================
from ..base import DocTransformer, LlamaIndexDocTransformerMixin


class BaseDocParser(DocTransformer):
    ...


class TitleExtractor(LlamaIndexDocTransformerMixin, BaseDocParser):
    def __init__(
        self,
        llm=None,
        nodes: int = 5,
        **params,
    ):
        super().__init__(llm=llm, nodes=nodes, **params)

    def _get_li_class(self):
        from llama_index.core.extractors import TitleExtractor

        return TitleExtractor


class SummaryExtractor(LlamaIndexDocTransformerMixin, BaseDocParser):
    def __init__(
        self,
        llm=None,
        summaries: list[str] = ["self"],
        **params,
    ):
        super().__init__(llm=llm, summaries=summaries, **params)

    def _get_li_class(self):
        from llama_index.core.extractors import SummaryExtractor

        return SummaryExtractor


================================================
FILE: libs/kotaemon/kotaemon/indices/ingests/__init__.py
================================================
from .files import DocumentIngestor

__all__ = ["DocumentIngestor"]


================================================
FILE: libs/kotaemon/kotaemon/indices/ingests/files.py
================================================
from pathlib import Path
from typing import Type

from decouple import config
from llama_index.core.readers.base import BaseReader
from llama_index.readers.file import PDFReader
from theflow.settings import settings as flowsettings

from kotaemon.base import BaseComponent, Document, Param
from kotaemon.indices.extractors import BaseDocParser
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
from kotaemon.loaders import (
    AdobeReader,
    AzureAIDocumentIntelligenceLoader,
    DirectoryReader,
    DoclingReader,
    HtmlReader,
    MathpixPDFReader,
    MhtmlReader,
    OCRReader,
    PandasExcelReader,
    PDFThumbnailReader,
    TxtReader,
    UnstructuredReader,
    WebReader,
)

web_reader = WebReader()
unstructured = UnstructuredReader()
adobe_reader = AdobeReader()
azure_reader = AzureAIDocumentIntelligenceLoader(
    endpoint=str(config("AZURE_DI_ENDPOINT", default="")),
    credential=str(config("AZURE_DI_CREDENTIAL", default="")),
    cache_dir=getattr(flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None),
)
docling_reader = DoclingReader()
adobe_reader.vlm_endpoint = (
    azure_reader.vlm_endpoint
) = docling_reader.vlm_endpoint = getattr(flowsettings, "KH_VLM_ENDPOINT", "")


KH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = {
    ".xlsx": PandasExcelReader(),
    ".docx": unstructured,
    ".pptx": unstructured,
    ".xls": unstructured,
    ".doc": unstructured,
    ".html": HtmlReader(),
    ".mhtml": MhtmlReader(),
    ".png": unstructured,
    ".jpeg": unstructured,
    ".jpg": unstructured,
    ".tiff": unstructured,
    ".tif": unstructured,
    ".pdf": PDFThumbnailReader(),
    ".txt": TxtReader(),
    ".md": TxtReader(),
}


class DocumentIngestor(BaseComponent):
    """Ingest common office document types into Document for indexing

    Document types:
        - pdf
        - xlsx, xls
        - docx, doc

    Args:
        pdf_mode: mode for pdf extraction, one of "normal", "mathpix", "ocr"
            - normal: parse pdf text
            - mathpix: parse pdf text using mathpix
            - ocr: parse pdf image using flax
        doc_parsers: list of document parsers to parse the document
        text_splitter: splitter to split the document into text nodes
        override_file_extractors: override file extractors for specific file extensions
            The default file extractors are stored in `KH_DEFAULT_FILE_EXTRACTORS`
    """

    pdf_mode: str = "normal"  # "normal", "mathpix", "ocr", "multimodal"
    doc_parsers: list[BaseDocParser] = Param(default_callback=lambda _: [])
    text_splitter: BaseSplitter = TokenSplitter.withx(
        chunk_size=1024,
        chunk_overlap=256,
        separator="\n\n",
        backup_separators=["\n", ".", " ", "\u200B"],
    )
    override_file_extractors: dict[str, Type[BaseReader]] = {}

    def _get_reader(self, input_files: list[str | Path]):
        """Get appropriate readers for the input files based on file extension"""
        file_extractors: dict[str, BaseReader] = {
            ext: reader for ext, reader in KH_DEFAULT_FILE_EXTRACTORS.items()
        }
        for ext, cls in self.override_file_extractors.items():
            file_extractors[ext] = cls()

        if self.pdf_mode == "normal":
            file_extractors[".pdf"] = PDFReader()
        elif self.pdf_mode == "ocr":
            file_extractors[".pdf"] = OCRReader()
        elif self.pdf_mode == "multimodal":
            file_extractors[".pdf"] = AdobeReader()
        else:
            file_extractors[".pdf"] = MathpixPDFReader()

        main_reader = DirectoryReader(
            input_files=input_files,
            file_extractor=file_extractors,
        )

        return main_reader

    def run(self, file_paths: list[str | Path] | str | Path) -> list[Document]:
        """Ingest the file paths into Document

        Args:
            file_paths: list of file paths or a single file path

        Returns:
            list of parsed Documents
        """
        if not isinstance(file_paths, list):
            file_paths = [file_paths]

        documents = self._get_reader(input_files=file_paths)()
        print(f"Read {len(file_paths)} files into {len(documents)} documents.")
        nodes = self.text_splitter(documents)
        print(f"Transform {len(documents)} documents into {len(nodes)} nodes.")
        self.log_progress(".num_docs", num_docs=len(nodes))

        # document parsers call
        if self.doc_parsers:
            for parser in self.doc_parsers:
                nodes = parser(nodes)

        return nodes


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/__init__.py
================================================
from .citation import CitationPipeline

__all__ = [
    "CitationPipeline",
]


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/citation.py
================================================
from typing import List

from pydantic import BaseModel, Field

from kotaemon.base import BaseComponent
from kotaemon.base.schema import HumanMessage, SystemMessage
from kotaemon.llms import BaseLLM


class CiteEvidence(BaseModel):
    """List of evidences (maximum 5) to support the answer."""

    evidences: List[str] = Field(
        ...,
        description=(
            "Each source should be a direct quote from the context, "
            "as a substring of the original content (max 15 words)."
        ),
    )


class CitationPipeline(BaseComponent):
    """Citation pipeline to extract cited evidences from source
    (based on input question)"""

    llm: BaseLLM

    def run(self, context: str, question: str):
        return self.invoke(context, question)

    def prepare_llm(self, context: str, question: str):
        schema = CiteEvidence.schema()
        function = {
            "name": schema["title"],
            "description": schema["description"],
            "parameters": schema,
        }
        llm_kwargs = {
            "tools": [{"type": "function", "function": function}],
            "tool_choice": "required",
            "tools_pydantic": [CiteEvidence],
        }
        messages = [
            SystemMessage(
                content=(
                    "You are a world class algorithm to answer "
                    "questions with correct and exact citations."
                )
            ),
            HumanMessage(
                content=(
                    "Answer question using the following context. "
                    "Use the provided function CiteEvidence() to cite your sources."
                )
            ),
            HumanMessage(content=context),
            HumanMessage(content=f"Question: {question}"),
            HumanMessage(
                content=(
                    "Tips: Make sure to cite your sources, "
                    "and use the exact words from the context."
                )
            ),
        ]
        return messages, llm_kwargs

    def invoke(self, context: str, question: str):
        messages, llm_kwargs = self.prepare_llm(context, question)
        try:
            print("CitationPipeline: invoking LLM")
            llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs)
            print("CitationPipeline: finish invoking LLM")
            if not llm_output.additional_kwargs.get("tool_calls"):
                return None

            first_func = llm_output.additional_kwargs["tool_calls"][0]

            if "function" in first_func:
                # openai and cohere format
                function_output = first_func["function"]["arguments"]
            else:
                # anthropic format
                function_output = first_func["args"]

            print("CitationPipeline:", function_output)

            if isinstance(function_output, str):
                output = CiteEvidence.parse_raw(function_output)
            else:
                output = CiteEvidence.parse_obj(function_output)
        except Exception as e:
            print(e)
            return None

        return output

    async def ainvoke(self, context: str, question: str):
        raise NotImplementedError()


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/citation_qa.py
================================================
import threading
from collections import defaultdict
from typing import Generator

import numpy as np
from decouple import config
from theflow.settings import settings as flowsettings

from kotaemon.base import (
    AIMessage,
    BaseComponent,
    Document,
    HumanMessage,
    Node,
    SystemMessage,
)
from kotaemon.llms import ChatLLM, PromptTemplate

from .citation import CitationPipeline
from .format_context import (
    EVIDENCE_MODE_FIGURE,
    EVIDENCE_MODE_TABLE,
    EVIDENCE_MODE_TEXT,
)
from .utils import find_text

try:
    from ktem.llms.manager import llms
    from ktem.reasoning.prompt_optimization.mindmap import CreateMindmapPipeline
    from ktem.utils.render import Render
except ImportError:
    raise ImportError("Please install `ktem` to use this component")

MAX_IMAGES = 10
CITATION_TIMEOUT = 5.0
CONTEXT_RELEVANT_WARNING_SCORE = config(
    "CONTEXT_RELEVANT_WARNING_SCORE", 0.3, cast=float
)

DEFAULT_QA_TEXT_PROMPT = (
    "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
    "If you don't know the answer, just say that you don't know, don't try to "
    "make up an answer. Give answer in "
    "{lang}.\n\n"
    "{context}\n"
    "Question: {question}\n"
    "Helpful Answer:"
)

DEFAULT_QA_TABLE_PROMPT = (
    "Use the given context: texts, tables, and figures below to answer the question, "
    "then provide answer with clear explanation."
    "If you don't know the answer, just say that you don't know, "
    "don't try to make up an answer. Give answer in {lang}.\n\n"
    "Context:\n"
    "{context}\n"
    "Question: {question}\n"
    "Helpful Answer:"
)  # noqa

DEFAULT_QA_CHATBOT_PROMPT = (
    "Pick the most suitable chatbot scenarios to answer the question at the end, "
    "output the provided answer text. If you don't know the answer, "
    "just say that you don't know. Keep the answer as concise as possible. "
    "Give answer in {lang}.\n\n"
    "Context:\n"
    "{context}\n"
    "Question: {question}\n"
    "Answer:"
)  # noqa

DEFAULT_QA_FIGURE_PROMPT = (
    "Use the given context: texts, tables, and figures below to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Give answer in {lang}.\n\n"
    "Context: \n"
    "{context}\n"
    "Question: {question}\n"
    "Answer: "
)  # noqa


class AnswerWithContextPipeline(BaseComponent):
    """Answer the question based on the evidence

    Args:
        llm: the language model to generate the answer
        citation_pipeline: generates citation from the evidence
        qa_template: the prompt template for LLM to generate answer (refer to
            evidence_mode)
        qa_table_template: the prompt template for LLM to generate answer for table
            (refer to evidence_mode)
        qa_chatbot_template: the prompt template for LLM to generate answer for
            pre-made scenarios (refer to evidence_mode)
        lang: the language of the answer. Currently support English and Japanese
    """

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
    use_multimodal: bool = getattr(flowsettings, "KH_REASONINGS_USE_MULTIMODAL", True)
    citation_pipeline: CitationPipeline = Node(
        default_callback=lambda _: CitationPipeline(llm=llms.get_default())
    )
    create_mindmap_pipeline: CreateMindmapPipeline = Node(
        default_callback=lambda _: CreateMindmapPipeline(llm=llms.get_default())
    )

    qa_template: str = DEFAULT_QA_TEXT_PROMPT
    qa_table_template: str = DEFAULT_QA_TABLE_PROMPT
    qa_chatbot_template: str = DEFAULT_QA_CHATBOT_PROMPT
    qa_figure_template: str = DEFAULT_QA_FIGURE_PROMPT

    enable_citation: bool = False
    enable_mindmap: bool = False
    enable_citation_viz: bool = False

    system_prompt: str = ""
    lang: str = "English"  # support English and Japanese
    n_last_interactions: int = 5

    def get_prompt(self, question, evidence, evidence_mode: int):
        """Prepare the prompt and other information for LLM"""
        if evidence_mode == EVIDENCE_MODE_TEXT:
            prompt_template = PromptTemplate(self.qa_template)
        elif evidence_mode == EVIDENCE_MODE_TABLE:
            prompt_template = PromptTemplate(self.qa_table_template)
        elif evidence_mode == EVIDENCE_MODE_FIGURE:
            if self.use_multimodal:
                prompt_template = PromptTemplate(self.qa_figure_template)
            else:
                prompt_template = PromptTemplate(self.qa_template)
        else:
            prompt_template = PromptTemplate(self.qa_chatbot_template)

        prompt = prompt_template.populate(
            context=evidence,
            question=question,
            lang=self.lang,
        )

        return prompt, evidence

    def run(
        self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
    ) -> Document:
        return self.invoke(question, evidence, evidence_mode, **kwargs)

    def invoke(
        self,
        question: str,
        evidence: str,
        evidence_mode: int = 0,
        images: list[str] = [],
        **kwargs,
    ) -> Document:
        raise NotImplementedError

    async def ainvoke(  # type: ignore
        self,
        question: str,
        evidence: str,
        evidence_mode: int = 0,
        images: list[str] = [],
        **kwargs,
    ) -> Document:
        """Answer the question based on the evidence

        In addition to the question and the evidence, this method also take into
        account evidence_mode. The evidence_mode tells which kind of evidence is.
        The kind of evidence affects:
            1. How the evidence is represented.
            2. The prompt to generate the answer.

        By default, the evidence_mode is 0, which means the evidence is plain text with
        no particular semantic representation. The evidence_mode can be:
            1. "table": There will be HTML markup telling that there is a table
                within the evidence.
            2. "chatbot": There will be HTML markup telling that there is a chatbot.
                This chatbot is a scenario, extracted from an Excel file, where each
                row corresponds to an interaction.

        Args:
            question: the original question posed by user
            evidence: the text that contain relevant information to answer the question
                (determined by retrieval pipeline)
            evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
        """
        raise NotImplementedError

    def stream(  # type: ignore
        self,
        question: str,
        evidence: str,
        evidence_mode: int = 0,
        images: list[str] = [],
        **kwargs,
    ) -> Generator[Document, None, Document]:
        history = kwargs.get("history", [])
        print(f"Got {len(images)} images")
        # check if evidence exists, use QA prompt
        if evidence:
            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
        else:
            prompt = question

        # retrieve the citation
        citation = None
        mindmap = None

        def citation_call():
            nonlocal citation
            citation = self.citation_pipeline(context=evidence, question=question)

        def mindmap_call():
            nonlocal mindmap
            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)

        citation_thread = None
        mindmap_thread = None

        # execute function call in thread
        if evidence:
            if self.enable_citation:
                citation_thread = threading.Thread(target=citation_call)
                citation_thread.start()

            if self.enable_mindmap:
                mindmap_thread = threading.Thread(target=mindmap_call)
                mindmap_thread.start()

        output = ""
        logprobs = []

        messages = []
        if self.system_prompt:
            messages.append(SystemMessage(content=self.system_prompt))

        for human, ai in history[-self.n_last_interactions :]:
            messages.append(HumanMessage(content=human))
            messages.append(AIMessage(content=ai))

        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
            # create image message:
            messages.append(
                HumanMessage(
                    content=[
                        {"type": "text", "text": prompt},
                    ]
                    + [
                        {
                            "type": "image_url",
                            "image_url": {"url": image},
                        }
                        for image in images[:MAX_IMAGES]
                    ],
                )
            )
        else:
            # append main prompt
            messages.append(HumanMessage(content=prompt))

        try:
            # try streaming first
            print("Trying LLM streaming")
            for out_msg in self.llm.stream(messages):
                output += out_msg.text
                logprobs += out_msg.logprobs
                yield Document(channel="chat", content=out_msg.text)
        except NotImplementedError:
            print("Streaming is not supported, falling back to normal processing")
            output = self.llm(messages).text
            yield Document(channel="chat", content=output)

        if logprobs:
            qa_score = np.exp(np.average(logprobs))
        else:
            qa_score = None

        if citation_thread:
            citation_thread.join(timeout=CITATION_TIMEOUT)
        if mindmap_thread:
            mindmap_thread.join(timeout=CITATION_TIMEOUT)

        answer = Document(
            text=output,
            metadata={
                "citation_viz": self.enable_citation_viz,
                "mindmap": mindmap,
                "citation": citation,
                "qa_score": qa_score,
            },
        )

        return answer

    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
        """Match the evidence with the context"""
        spans: dict[str, list[dict]] = defaultdict(list)

        if not answer.metadata["citation"]:
            return spans

        evidences = answer.metadata["citation"].evidences
        for quote in evidences:
            matched_excerpts = []
            for doc in docs:
                matches = find_text(quote, doc.text)

                for start, end in matches:
                    if "|" not in doc.text[start:end]:
                        spans[doc.doc_id].append(
                            {
                                "start": start,
                                "end": end,
                            }
                        )
                        matched_excerpts.append(doc.text[start:end])

            # print("Matched citation:", quote, matched_excerpts),
        return spans

    def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
        """Prepare the citations to show on the UI"""
        with_citation, without_citation = [], []
        has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)

        spans = self.match_evidence_with_context(answer, docs)
        id2docs = {doc.doc_id: doc for doc in docs}
        not_detected = set(id2docs.keys()) - set(spans.keys())

        # render highlight spans
        for _id, ss in spans.items():
            if not ss:
                not_detected.add(_id)
                continue
            cur_doc = id2docs[_id]
            highlight_text = ""

            ss = sorted(ss, key=lambda x: x["start"])
            last_end = 0
            text = cur_doc.text[: ss[0]["start"]]

            for idx, span in enumerate(ss):
                # prevent overlapping between span
                span_start = max(last_end, span["start"])
                span_end = max(last_end, span["end"])

                to_highlight = cur_doc.text[span_start:span_end]
                last_end = span_end

                # append to highlight on PDF viewer
                highlight_text += (" " if highlight_text else "") + to_highlight

                span_idx = span.get("idx", None)
                if span_idx is not None:
                    to_highlight = f"【{span_idx}】" + to_highlight

                text += Render.highlight(
                    to_highlight,
                    elem_id=str(span_idx) if span_idx is not None else None,
                )
                if idx < len(ss) - 1:
                    text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]

            text += cur_doc.text[ss[-1]["end"] :]
            # add to display list
            with_citation.append(
                Document(
                    channel="info",
                    content=Render.collapsible_with_header_score(
                        cur_doc,
                        override_text=text,
                        highlight_text=highlight_text,
                        open_collapsible=True,
                    ),
                )
            )

        print("Got {} cited docs".format(len(with_citation)))

        sorted_not_detected_items_with_scores = [
            (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
            for id_ in not_detected
        ]
        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)

        for id_, _ in sorted_not_detected_items_with_scores:
            doc = id2docs[id_]
            doc_score = doc.metadata.get("llm_trulens_score", 0.0)
            is_open = not has_llm_score or (
                doc_score
                > CONTEXT_RELEVANT_WARNING_SCORE
                # and len(with_citation) == 0
            )
            without_citation.append(
                Document(
                    channel="info",
                    content=Render.collapsible_with_header_score(
                        doc, open_collapsible=is_open
                    ),
                )
            )
        return with_citation, without_citation


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/citation_qa_inline.py
================================================
import re
import threading
from collections import defaultdict
from dataclasses import dataclass
from typing import Generator

import numpy as np

from kotaemon.base import AIMessage, Document, HumanMessage, SystemMessage
from kotaemon.llms import PromptTemplate

from .citation_qa import CITATION_TIMEOUT, MAX_IMAGES, AnswerWithContextPipeline
from .format_context import EVIDENCE_MODE_FIGURE
from .utils import find_start_end_phrase

DEFAULT_QA_CITATION_PROMPT = """
Use the following pieces of context to answer the question at the end.
Provide DETAILED ansswer with clear explanation.
Format answer with easy to follow bullets / paragraphs.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use the same language as the question to response.

CONTEXT:
----
{context}
----

Answer using this format:
CITATION LIST

// the index in this array
CITATION【number】

// output 2 phrase to mark start and end of the relevant span
// each has ~ 6 words
// MUST COPY EXACTLY from the CONTEXT
// NO CHANGE or REPHRASE
// RELEVANT_SPAN_FROM_CONTEXT
START_PHRASE: string
END_PHRASE: string

// When you answer, ensure to add citations from the documents
// in the CONTEXT with a number that corresponds to the answersInText array.
// (in the form [number])
// Try to include the number after each facts / statements you make.
// You can create as many citations as you need.
FINAL ANSWER
string

STRICTLY FOLLOW THIS EXAMPLE:
CITATION LIST

CITATION【1】

START_PHRASE: Known as fixed-size chunking , the traditional
END_PHRASE: not degrade the final retrieval performance.

CITATION【2】

START_PHRASE: Fixed-size Chunker This is our baseline chunker
END_PHRASE: this shows good retrieval quality.

FINAL ANSWER
An alternative to semantic chunking is fixed-size chunking. This traditional method involves splitting documents into chunks of a predetermined or user-specified size, regardless of semantic content, which is computationally efficient【1】. However, it may result in the fragmentation of semantically related content, thereby potentially degrading retrieval performance【1】【2】.

QUESTION: {question}\n
ANSWER:
"""  # noqa

START_ANSWER = "FINAL ANSWER"
START_CITATION = "CITATION LIST"
CITATION_PATTERN = r"citation【(\d+)】"
START_ANSWER_PATTERN = "start_phrase:"
END_ANSWER_PATTERN = "end_phrase:"


@dataclass
class InlineEvidence:
    """List of evidences to support the answer."""

    start_phrase: str | None = None
    end_phrase: str | None = None
    idx: int | None = None


class AnswerWithInlineCitation(AnswerWithContextPipeline):
    """Answer the question based on the evidence with inline citation"""

    qa_citation_template: str = DEFAULT_QA_CITATION_PROMPT

    def get_prompt(self, question, evidence, evidence_mode: int):
        """Prepare the prompt and other information for LLM"""
        prompt_template = PromptTemplate(self.qa_citation_template)

        prompt = prompt_template.populate(
            context=evidence,
            question=question,
            safe=False,
        )

        return prompt, evidence

    def answer_to_citations(self, answer) -> list[InlineEvidence]:
        citations: list[InlineEvidence] = []
        lines = answer.split("\n")

        current_evidence = None

        for line in lines:
            # check citation idx using regex
            match = re.match(CITATION_PATTERN, line.lower())

            if match:
                try:
                    parsed_citation_idx = int(match.group(1))
                except ValueError:
                    parsed_citation_idx = None

                # conclude the current evidence if exists
                if current_evidence:
                    citations.append(current_evidence)
                    current_evidence = None

                current_evidence = InlineEvidence(idx=parsed_citation_idx)
            else:
                for keyword in [START_ANSWER_PATTERN, END_ANSWER_PATTERN]:
                    if line.lower().startswith(keyword):
                        matched_phrase = line[len(keyword) :].strip()
                        if not current_evidence:
                            current_evidence = InlineEvidence(idx=None)

                        if keyword == START_ANSWER_PATTERN:
                            current_evidence.start_phrase = matched_phrase
                        else:
                            current_evidence.end_phrase = matched_phrase

                        break

            if (
                current_evidence
                and current_evidence.end_phrase
                and current_evidence.start_phrase
            ):
                citations.append(current_evidence)
                current_evidence = None

        if current_evidence:
            citations.append(current_evidence)

        return citations

    def replace_citation_with_link(self, answer: str):
        # Define the regex pattern to match 【number】
        pattern = r"【\d+】"
        alternate_pattern = r"\[\d+\]"

        # Regular expression to match merged citations
        multi_pattern = r"【([\d,\s]+)】"

        # Function to replace merged citations with independent ones
        def split_citations(match):
            # Extract the numbers, split by comma, and create individual citations
            numbers = match.group(1).split(",")
            return "".join(f"【{num.strip()}】" for num in numbers)

        # Replace merged citations in the text
        answer = re.sub(multi_pattern, split_citations, answer)

        # Find all citations in the answer
        matches = list(re.finditer(pattern, answer))
        if not matches:
            matches = list(re.finditer(alternate_pattern, answer))

        matched_citations = set()
        for match in matches:
            citation = match.group()
            matched_citations.add(citation)

        for citation in matched_citations:
            citation_id = citation[1:-1]
            answer = answer.replace(
                citation,
                (
                    "<a href='#' class='citation' "
                    f"id='mark-{citation_id}'>【{citation_id}】</a>"
                ),
            )

        answer = answer.replace(START_CITATION, "")

        return answer

    def stream(  # type: ignore
        self,
        question: str,
        evidence: str,
        evidence_mode: int = 0,
        images: list[str] = [],
        **kwargs,
    ) -> Generator[Document, None, Document]:
        history = kwargs.get("history", [])
        print(f"Got {len(images)} images")
        # check if evidence exists, use QA prompt
        if evidence:
            prompt, evidence = self.get_prompt(question, evidence, evidence_mode)
        else:
            prompt = question

        output = ""
        logprobs = []

        citation = None
        mindmap = None

        def mindmap_call():
            nonlocal mindmap
            mindmap = self.create_mindmap_pipeline(context=evidence, question=question)

        mindmap_thread = None

        # execute function call in thread
        if evidence:
            if self.enable_mindmap:
                mindmap_thread = threading.Thread(target=mindmap_call)
                mindmap_thread.start()

        messages = []
        if self.system_prompt:
            messages.append(SystemMessage(content=self.system_prompt))

        for human, ai in history[-self.n_last_interactions :]:
            messages.append(HumanMessage(content=human))
            messages.append(AIMessage(content=ai))

        if self.use_multimodal and evidence_mode == EVIDENCE_MODE_FIGURE:
            # create image message:
            messages.append(
                HumanMessage(
                    content=[
                        {"type": "text", "text": prompt},
                    ]
                    + [
                        {
                            "type": "image_url",
                            "image_url": {"url": image},
                        }
                        for image in images[:MAX_IMAGES]
                    ],
                )
            )
        else:
            # append main prompt
            messages.append(HumanMessage(content=prompt))

        final_answer = ""

        try:
            # try streaming first
            print("Trying LLM streaming")
            for out_msg in self.llm.stream(messages):
                if evidence:
                    if START_ANSWER in output:
                        if not final_answer:
                            try:
                                left_over_answer = output.split(START_ANSWER)[
                                    1
                                ].lstrip()
                            except IndexError:
                                left_over_answer = ""
                            if left_over_answer:
                                out_msg.text = left_over_answer + out_msg.text

                        final_answer += (
                            out_msg.text.lstrip() if not final_answer else out_msg.text
                        )
                        yield Document(channel="chat", content=out_msg.text)

                        # check for the edge case of citation list is repeated
                        # with smaller LLMs
                        if START_CITATION in out_msg.text:
                            break
                else:
                    yield Document(channel="chat", content=out_msg.text)

                output += out_msg.text
                logprobs += out_msg.logprobs
        except NotImplementedError:
            print("Streaming is not supported, falling back to normal processing")
            output = self.llm(messages).text
            yield Document(channel="chat", content=output)

        if logprobs:
            qa_score = np.exp(np.average(logprobs))
        else:
            qa_score = None

        citation = self.answer_to_citations(output)

        if mindmap_thread:
            mindmap_thread.join(timeout=CITATION_TIMEOUT)

        # convert citation to link
        answer = Document(
            text=final_answer,
            metadata={
                "citation_viz": self.enable_citation_viz,
                "mindmap": mindmap,
                "citation": citation,
                "qa_score": qa_score,
            },
        )

        # yield the final answer
        final_answer = self.replace_citation_with_link(final_answer)

        if final_answer:
            yield Document(channel="chat", content=None)
            yield Document(channel="chat", content=final_answer)

        return answer

    def match_evidence_with_context(self, answer, docs) -> dict[str, list[dict]]:
        """Match the evidence with the context"""
        spans: dict[str, list[dict]] = defaultdict(list)

        if not answer.metadata["citation"]:
            return spans

        evidences = answer.metadata["citation"]

        for e_id, evidence in enumerate(evidences):
            start_phrase, end_phrase = evidence.start_phrase, evidence.end_phrase
            evidence_idx = evidence.idx

            if evidence_idx is None:
                evidence_idx = e_id + 1

            best_match = None
            best_match_length = 0
            best_match_doc_idx = None

            for doc in docs:
                match, match_length = find_start_end_phrase(
                    start_phrase, end_phrase, doc.text
                )
                if best_match is None or (
                    match is not None and match_length > best_match_length
                ):
                    best_match = match
                    best_match_length = match_length
                    best_match_doc_idx = doc.doc_id

            if best_match is not None and best_match_doc_idx is not None:
                spans[best_match_doc_idx].append(
                    {
                        "start": best_match[0],
                        "end": best_match[1],
                        "idx": evidence_idx,
                    }
                )
        return spans


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/format_context.py
================================================
import html
from functools import partial

import tiktoken

from kotaemon.base import BaseComponent, Document, RetrievedDocument
from kotaemon.indices.splitters import TokenSplitter

EVIDENCE_MODE_TEXT = 0
EVIDENCE_MODE_TABLE = 1
EVIDENCE_MODE_CHATBOT = 2
EVIDENCE_MODE_FIGURE = 3


class PrepareEvidencePipeline(BaseComponent):
    """Prepare the evidence text from the list of retrieved documents

    This step usually happens after `DocumentRetrievalPipeline`.

    Args:
        trim_func: a callback function or a BaseComponent, that splits a large
            chunk of text into smaller ones. The first one will be retained.
    """

    max_context_length: int = 32000
    trim_func: TokenSplitter | None = None

    def run(self, docs: list[RetrievedDocument]) -> Document:
        evidence = ""
        images = []
        table_found = 0
        evidence_modes = []

        evidence_trim_func = (
            self.trim_func
            if self.trim_func
            else TokenSplitter(
                chunk_size=self.max_context_length,
                chunk_overlap=0,
                separator=" ",
                tokenizer=partial(
                    tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
                    allowed_special=set(),
                    disallowed_special="all",
                ),
            )
        )

        for _, retrieved_item in enumerate(docs):
            retrieved_content = ""
            page = retrieved_item.metadata.get("page_label", None)
            source = filename = retrieved_item.metadata.get("file_name", "-")
            if page:
                source += f" (Page {page})"
            if retrieved_item.metadata.get("type", "") == "table":
                evidence_modes.append(EVIDENCE_MODE_TABLE)
                if table_found < 5:
                    retrieved_content = retrieved_item.metadata.get(
                        "table_origin", retrieved_item.text
                    )
                    if retrieved_content not in evidence:
                        table_found += 1
                        evidence += (
                            f"<br><b>Table from {source}</b>\n"
                            + retrieved_content
                            + "\n<br>"
                        )
            elif retrieved_item.metadata.get("type", "") == "chatbot":
                evidence_modes.append(EVIDENCE_MODE_CHATBOT)
                retrieved_content = retrieved_item.metadata["window"]
                evidence += (
                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
                    + retrieved_content
                    + "\n<br>"
                )
            elif retrieved_item.metadata.get("type", "") == "image":
                evidence_modes.append(EVIDENCE_MODE_FIGURE)
                retrieved_content = retrieved_item.metadata.get("image_origin", "")
                retrieved_caption = html.escape(retrieved_item.get_content())
                evidence += (
                    f"<br><b>Figure from {source}</b>\n"
                    + "<img width='85%' src='<src>' "
                    + f"alt='{retrieved_caption}'/>"
                    + "\n<br>"
                )
                images.append(retrieved_content)
            else:
                if "window" in retrieved_item.metadata:
                    retrieved_content = retrieved_item.metadata["window"]
                else:
                    retrieved_content = retrieved_item.text
                retrieved_content = retrieved_content.replace("\n", " ")
                if retrieved_content not in evidence:
                    evidence += (
                        f"<br><b>Content from {source}: </b> "
                        + retrieved_content
                        + " \n<br>"
                    )

        # resolve evidence mode
        evidence_mode = EVIDENCE_MODE_TEXT
        if EVIDENCE_MODE_FIGURE in evidence_modes:
            evidence_mode = EVIDENCE_MODE_FIGURE
        elif EVIDENCE_MODE_TABLE in evidence_modes:
            evidence_mode = EVIDENCE_MODE_TABLE

        # trim context by trim_len
        print("len (original)", len(evidence))
        if evidence:
            texts = evidence_trim_func([Document(text=evidence)])
            evidence = texts[0].text
            print("len (trimmed)", len(evidence))

        return Document(content=(evidence_mode, evidence, images))


================================================
FILE: libs/kotaemon/kotaemon/indices/qa/utils.py
================================================
from difflib import SequenceMatcher


def find_text(search_span, context, min_length=5):
    search_span, context = search_span.lower(), context.lower()

    sentence_list = search_span.split("\n")
    context = context.replace("\n", " ")

    matches_span = []
    # don't search for small text
    if len(search_span) > min_length:
        for sentence in sentence_list:
            match_results = SequenceMatcher(
                None,
                sentence,
                context,
                autojunk=False,
            ).get_matching_blocks()

            matched_blocks = []
            for _, start, length in match_results:
                if length > max(len(sentence) * 0.25, min_length):
                    matched_blocks.append((start, start + length))

            if matched_blocks:
                start_index = min(start for start, _ in matched_blocks)
                end_index = max(end for _, end in matched_blocks)
                length = end_index - start_index

                if length > max(len(sentence) * 0.35, min_length):
                    matches_span.append((start_index, end_index))

    if matches_span:
        # merge all matches into one span
        final_span = min(start for start, _ in matches_span), max(
            end for _, end in matches_span
        )
        matches_span = [final_span]

    return matches_span


def find_start_end_phrase(
    start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300
):
    start_phrase, end_phrase = start_phrase.lower(), end_phrase.lower()
    context = context.lower()

    context = context.replace("\n", " ")

    matches = []
    matched_length = 0
    for sentence in [start_phrase, end_phrase]:
        if sentence is None:
            continue

        match = SequenceMatcher(
            None, sentence, context, autojunk=False
        ).find_longest_match()
        if match.size > max(len(sentence) * 0.35, min_length):
            matches.append((match.b, match.b + match.size))
            matched_length += match.size

    # check if second match is before the first match
    if len(matches) == 2 and matches[1][0] < matches[0][0]:
        # if so, keep only the first match
        matches = [matches[0]]

    if matches:
        start_idx = min(start for start, _ in matches)
        end_idx = max(end for _, end in matches)

        # check if the excerpt is too long
        if end_idx - start_idx > max_excerpt_length:
            end_idx = start_idx + max_excerpt_length

        final_match = (start_idx, end_idx)
    else:
        final_match = None

    return final_match, matched_length


def replace_think_tag_with_details(text):
    text = text.replace(
        "<think>",
        '<details><summary><span style="color:grey">Thought</span></summary><blockquote>',  # noqa
    )
    text = text.replace("</think>", "</blockquote></details>")
    return text


def strip_think_tag(text):
    if "</think>" in text:
        text = text.split("</think>")[1]
    return text


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/__init__.py
================================================
from .base import BaseReranking
from .cohere import CohereReranking
from .llm import LLMReranking
from .llm_scoring import LLMScoring
from .llm_trulens import LLMTrulensScoring

__all__ = [
    "CohereReranking",
    "LLMReranking",
    "LLMScoring",
    "BaseReranking",
    "LLMTrulensScoring",
]


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/base.py
================================================
from __future__ import annotations

from abc import abstractmethod

from kotaemon.base import BaseComponent, Document


class BaseReranking(BaseComponent):
    @abstractmethod
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Main method to transform list of documents
        (re-ranking, filtering, etc)"""
        ...


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/cohere.py
================================================
from __future__ import annotations

from decouple import config

from kotaemon.base import Document

from .base import BaseReranking


class CohereReranking(BaseReranking):
    model_name: str = "rerank-multilingual-v2.0"
    cohere_api_key: str = config("COHERE_API_KEY", "")
    use_key_from_ktem: bool = False

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
        with their relevance score"""
        try:
            import cohere
        except ImportError:
            raise ImportError(
                "Please install Cohere `pip install cohere` to use Cohere Reranking"
            )

        # try to get COHERE_API_KEY from embeddings
        if not self.cohere_api_key and self.use_key_from_ktem:
            try:
                from ktem.embeddings.manager import (
                    embedding_models_manager as embeddings,
                )

                cohere_model = embeddings.get("cohere")
                ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore
                    "cohere_api_key"
                )
                if ktem_cohere_api_key != "your-key":
                    self.cohere_api_key = ktem_cohere_api_key
            except Exception as e:
                print("Cannot get Cohere API key from `ktem`", e)

        if not self.cohere_api_key:
            print("Cohere API key not found. Skipping rerankings.")
            return documents

        cohere_client = cohere.Client(self.cohere_api_key)
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        _docs = [d.content for d in documents]
        response = cohere_client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["reranking_score"] = r.relevance_score
            compressed_docs.append(doc)

        return compressed_docs


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/llm.py
================================================
from __future__ import annotations

from concurrent.futures import ThreadPoolExecutor

from langchain.output_parsers.boolean import BooleanOutputParser

from kotaemon.base import Document
from kotaemon.llms import BaseLLM, PromptTemplate

from .base import BaseReranking

RERANK_PROMPT_TEMPLATE = """Given the following question and context,
return YES if the context is relevant to the question and NO if it isn't.

> Question: {question}
> Context:
>>>
{context}
>>>
> Relevant (YES / NO):"""


class LLMReranking(BaseReranking):
    llm: BaseLLM
    prompt_template: PromptTemplate = PromptTemplate(template=RERANK_PROMPT_TEMPLATE)
    top_k: int = 3
    concurrent: bool = True

    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs = []
        output_parser = BooleanOutputParser()

        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    _prompt = self.prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                    futures.append(executor.submit(lambda: self.llm(_prompt).text))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                results.append(self.llm(_prompt).text)

        # use Boolean parser to extract relevancy output from LLM
        results = [output_parser.parse(result) for result in results]
        for include_doc, doc in zip(results, documents):
            if include_doc:
                filtered_docs.append(doc)

        # prevent returning empty result
        if len(filtered_docs) == 0:
            filtered_docs = documents[: self.top_k]

        return filtered_docs


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
================================================
from __future__ import annotations

from concurrent.futures import ThreadPoolExecutor

import numpy as np
from langchain.output_parsers.boolean import BooleanOutputParser

from kotaemon.base import Document

from .llm import LLMReranking


class LLMScoring(LLMReranking):
    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs: list[Document] = []
        output_parser = BooleanOutputParser()

        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    _prompt = self.prompt_template.populate(
                        question=query, context=doc.get_content()
                    )
                    futures.append(executor.submit(lambda: self.llm(_prompt)))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                _prompt = self.prompt_template.populate(
                    question=query, context=doc.get_content()
                )
                results.append(self.llm(_prompt))

        for result, doc in zip(results, documents):
            score = np.exp(np.average(result.logprobs))
            include_doc = output_parser.parse(result.text)
            if include_doc:
                doc.metadata["llm_reranking_score"] = score
            else:
                doc.metadata["llm_reranking_score"] = 1 - score
            filtered_docs.append(doc)

        # prevent returning empty result
        if len(filtered_docs) == 0:
            filtered_docs = documents[: self.top_k]

        return filtered_docs


================================================
FILE: libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
================================================
from __future__ import annotations

import re
from concurrent.futures import ThreadPoolExecutor
from functools import partial

import tiktoken

from kotaemon.base import Document, HumanMessage, SystemMessage
from kotaemon.indices.splitters import TokenSplitter
from kotaemon.llms import BaseLLM, PromptTemplate

from .llm import LLMReranking

SYSTEM_PROMPT_TEMPLATE = PromptTemplate(
    """You are a RELEVANCE grader; providing the relevance of the given CONTEXT to the given QUESTION.
        Respond only as a number from 0 to 10 where 0 is the least relevant and 10 is the most relevant.

        A few additional scoring guidelines:

        - Long CONTEXTS should score equally well as short CONTEXTS.

        - RELEVANCE score should increase as the CONTEXTS provides more RELEVANT context to the QUESTION.

        - RELEVANCE score should increase as the CONTEXTS provides RELEVANT context to more parts of the QUESTION.

        - CONTEXT that is RELEVANT to some of the QUESTION should score of 2, 3 or 4. Higher score indicates more RELEVANCE.

        - CONTEXT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE.

        - CONTEXT that is RELEVANT to the entire QUESTION should get a score of 9 or 10. Higher score indicates more RELEVANCE.

        - CONTEXT must be relevant and helpful for answering the entire QUESTION to get a score of 10.

        - Never elaborate."""  # noqa: E501
)

USER_PROMPT_TEMPLATE = PromptTemplate(
    """QUESTION: {question}

        CONTEXT: {context}

        RELEVANCE: """
)  # noqa

PATTERN_INTEGER: re.Pattern = re.compile(r"([+-]?[1-9][0-9]*|0)")
"""Regex that matches integers."""

MAX_CONTEXT_LEN = 7500


def validate_rating(rating) -> int:
    """Validate a rating is between 0 and 10."""

    if not 0 <= rating <= 10:
        raise ValueError("Rating must be between 0 and 10")

    return rating


def re_0_10_rating(s: str) -> int:
    """Extract a 0-10 rating from a string.

    If the string does not match an integer or matches an integer outside the
    0-10 range, raises an error instead. If multiple numbers are found within
    the expected 0-10 range, the smallest is returned.

    Args:
        s: String to extract rating from.

    Returns:
        int: Extracted rating.

    Raises:
        ParseError: If no integers between 0 and 10 are found in the string.
    """

    matches = PATTERN_INTEGER.findall(s)
    if not matches:
        raise AssertionError

    vals = set()
    for match in matches:
        try:
            vals.add(validate_rating(int(match)))
        except ValueError:
            pass

    if not vals:
        raise AssertionError

    # Min to handle cases like "The rating is 8 out of 10."
    return min(vals)


class LLMTrulensScoring(LLMReranking):
    llm: BaseLLM
    system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE
    user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE
    concurrent: bool = True
    normalize: float = 10
    trim_func: TokenSplitter = TokenSplitter.withx(
        chunk_size=MAX_CONTEXT_LEN,
        chunk_overlap=0,
        separator=" ",
        tokenizer=partial(
            tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
            allowed_special=set(),
            disallowed_special="all",
        ),
    )

    def run(
        self,
        documents: list[Document],
        query: str,
    ) -> list[Document]:
        """Filter down documents based on their relevance to the query."""
        filtered_docs = []

        documents = sorted(documents, key=lambda doc: doc.get_content())
        if self.concurrent:
            with ThreadPoolExecutor() as executor:
                futures = []
                for doc in documents:
                    chunked_doc_content = self.trim_func(
                        [
                            Document(content=doc.get_content())
                            # skip metadata which cause troubles
                        ]
                    )[0].text

                    messages = []
                    messages.append(
                        SystemMessage(self.system_prompt_template.populate())
                    )
                    messages.append(
                        HumanMessage(
                            self.user_prompt_template.populate(
                                question=query, context=chunked_doc_content
                            )
                        )
                    )

                    def llm_call():
                        return self.llm(messages).text

                    futures.append(executor.submit(llm_call))

                results = [future.result() for future in futures]
        else:
            results = []
            for doc in documents:
                messages = []
                messages.append(SystemMessage(self.system_prompt_template.populate()))
                messages.append(
                    SystemMessage(
                        self.user_prompt_template.populate(
                            question=query, context=doc.get_content()
                        )
                    )
                )
                results.append(self.llm(messages).text)

        # use Boolean parser to extract relevancy output from LLM
        results = [
            (r_idx, float(re_0_10_rating(result)) / self.normalize)
            for r_idx, result in enumerate(results)
        ]
        results.sort(key=lambda x: x[1], reverse=True)

        for r_idx, score in results:
            doc = documents[r_idx]
            doc.metadata["llm_trulens_score"] = score
            filtered_docs.append(doc)

        print(
            "LLM rerank scores",
            [doc.metadata["llm_trulens_score"] for doc in filtered_docs],
        )

        return filtered_docs


================================================
FILE: libs/kotaemon/kotaemon/indices/retrievers/__init__.py
================================================


================================================
FILE: libs/kotaemon/kotaemon/indices/retrievers/jina_web_search.py
================================================
import requests
from decouple import config

from kotaemon.base import BaseComponent, RetrievedDocument

JINA_API_KEY = config("JINA_API_KEY", default="")
JINA_URL = config("JINA_URL", default="https://r.jina.ai/")


class WebSearch(BaseComponent):
    """WebSearch component for fetching data from the web
    using Jina API
    """

    def run(
        self,
        text: str,
        *args,
        **kwargs,
    ) -> list[RetrievedDocument]:
        if JINA_API_KEY == "":
            raise ValueError(
                "This feature requires JINA_API_KEY "
                "(get free one from https://jina.ai/reader)"
            )

        # setup the request
        api_url = f"https://s.jina.ai/{text}"
        headers = {"X-With-Generated-Alt": "true", "Accept": "application/json"}
        if JINA_API_KEY:
            headers["Authorization"] = f"Bearer {JINA_API_KEY}"

        response = requests.get(api_url, headers=headers)
        response.raise_for_status()
        response_dict = response.json()

        return [
            RetrievedDocument(
                text=(
                    "###URL: [{url}]({url})\n\n"
                    "####{title}\n\n"
                    "{description}\n"
                    "{content}"
                ).format(
                    url=item["url"],
                    title=item["title"],
                    description=item["description"],
                    content=item["content"],
                ),
                metadata={
                    "file_name": "Web search",
                    "type": "table",
                    "llm_trulens_score": 1.0,
                },
            )
            for item in response_dict["data"]
        ]

    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):
        return documents


================================================
FILE: libs/kotaemon/kotaemon/indices/retrievers/tavily_web_search.py
================================================
from decouple import config

from kotaemon.base import BaseComponent, RetrievedDocument

TAVILY_API_KEY = config("TAVILY_API_KEY", default="")


class WebSearch(BaseComponent):
    """WebSearch component for fetching data from the web
    using Jina API
    """

    def run(
        self,
        text: str,
        *args,
        **kwargs,
    ) -> list[RetrievedDocument]:
        if TAVILY_API_KEY == "":
            raise ValueError(
                "This feature requires TAVILY_API_KEY "
                "(get free one from https://app.tavily.com/)"
            )

        try:
            from tavily import TavilyClient
        except ImportError:
            raise ImportError(
                "Please install `pip install tavily-python` to use this feature"
            )

        tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
        results = tavily_client.search(
            query=text,
            search_depth="advanced",
        )["results"]
        context = "\n\n".join(
            "###URL: [{url}]({url})\n\n{content}".format(
                url=result["url"],
                content=result["content"],
            )
            for result in results
        )

        return [
            RetrievedDocument(
                text=context,
                metadata={
                    "file_name": "Web search",
                    "type": "table",
                    "llm_trulens_score": 1.0,
                },
            )
        ]

    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):
        return documents


================================================
FILE: libs/kotaemon/kotaemon/indices/splitters/__init__.py
================================================
from ..base import DocTransformer, LlamaIndexDocTransformerMixin


class BaseSplitter(DocTransformer):
    """Represent base splitter class"""

    ...


class TokenSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
    def __init__(
        self,
        chunk_size: int = 1024,
        chunk_overlap: int = 20,
        separator: str = " ",
        **params,
    ):
        super().__init__(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separator=separator,
            **params,
        )

    def _get_li_class(self):
        from llama_index.core.text_splitter import TokenTextSplitter

        return TokenTextSplitter


class SentenceWindowSplitter(LlamaIndexDocTransformerMixin, BaseSplitter):
    def __init__(
        self,
        window_size: int = 3,
        window_metadata_key: str = "window",
        original_text_metadata_key: str = "original_text",
        **params,
    ):
        super().__init__(
            window_size=window_size,
            window_metadata_key=window_metadata_key,
            original_text_metadata_key=original_text_metadata_key,
            **params,
        )

    def _get_li_class(self):
        from llama_index.core.node_parser import SentenceWindowNodeParser

        return SentenceWindowNodeParser


================================================
FILE: libs/kotaemon/kotaemon/indices/vectorindex.py
================================================
from __future__ import annotations

import threading
import uuid
from pathlib import Path
from typing import Optional, Sequence, cast

from theflow.settings import settings as flowsettings

from kotaemon.base import BaseComponent, Document, RetrievedDocument
from kotaemon.embeddings import BaseEmbeddings
from kotaemon.storages import BaseDocumentStore, BaseVectorStore

from .base import BaseIndexing, BaseRetrieval
from .rankings import BaseReranking, LLMReranking

VECTOR_STORE_FNAME = "vectorstore"
DOC_STORE_FNAME = "docstore"


class VectorIndexing(BaseIndexing):
    """Ingest the document, run through the embedding, and store the embedding in a
    vector store.

    This pipeline supports the following set of inputs:
        - List of documents
        - List of texts
    """

    cache_dir: Optional[str] = getattr(flowsettings, "KH_CHUNKS_OUTPUT_DIR", None)
    vector_store: BaseVectorStore
    doc_store: Optional[BaseDocumentStore] = None
    embedding: BaseEmbeddings
    count_: int = 0

    def to_retrieval_pipeline(self, *args, **kwargs):
        """Convert the indexing pipeline to a retrieval pipeline"""
        return VectorRetrieval(
            vector_store=self.vector_store,
            doc_store=self.doc_store,
            embedding=self.embedding,
            **kwargs,
        )

    def write_chunk_to_file(self, docs: list[Document]):
        # save the chunks content into markdown format
        if self.cache_dir:
            file_name = docs[0].metadata.get("file_name")
            if not file_name:
                return

            file_name = Path(file_name)
            for i in range(len(docs)):
                markdown_content = ""
                if "page_label" in docs[i].metadata:
                    page_label = str(docs[i].metadata["page_label"])
                    markdown_content += f"Page label: {page_label}"
                if "file_name" in docs[i].metadata:
                    filename = docs[i].metadata["file_name"]
                    markdown_content += f"\nFile name: {filename}"
                if "section" in docs[i].metadata:
                    section = docs[i].metadata["section"]
                    markdown_content += f"\nSection: {section}"
                if "type" in docs[i].metadata:
                    if docs[i].metadata["type"] == "image":
                        image_origin = docs[i].metadata["image_origin"]
                        image_origin = f'<p><img src="{image_origin}"></p>'
                        markdown_content += f"\nImage origin: {image_origin}"
                if docs[i].text:
                    markdown_content += f"\ntext:\n{docs[i].text}"

                with open(
                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md",
                    "w",
                    encoding="utf-8",
                ) as f:
                    f.write(markdown_content)

    def add_to_docstore(self, docs: list[Document]):
        if self.doc_store:
            print("Adding documents to doc store")
            self.doc_store.add(docs)

    def add_to_vectorstore(self, docs: list[Document]):
        # in case we want to skip embedding
        if self.vector_store:
            print(f"Getting embeddings for {len(docs)} nodes")
            embeddings = self.embedding(docs)
            print("Adding embeddings to vector store")
            self.vector_store.add(
                embeddings=embeddings,
                ids=[t.doc_id for t in docs],
            )

    def run(self, text: str | list[str] | Document | list[Document]):
        input_: list[Document] = []
        if not isinstance(text, list):
            text = [text]

        for item in cast(list, text):
            if isinstance(item, str):
                input_.append(Document(text=item, id_=str(uuid.uuid4())))
            elif isinstance(item, Document):
                input_.append(item)
            else:
                raise ValueError(
                    f"Invalid input type {type(item)}, should be str or Document"
                )

        self.add_to_vectorstore(input_)
        self.add_to_docstore(input_)
        self.write_chunk_to_file(input_)
        self.count_ += len(input_)


class VectorRetrieval(BaseRetrieval):
    """Retrieve list of documents from vector store"""

    vector_store: BaseVectorStore
    doc_store: Optional[BaseDocumentStore] = None
    embedding: BaseEmbeddings
    rerankers: Sequence[BaseReranking] = []
    top_k: int = 5
    first_round_top_k_mult: int = 10
    retrieval_mode: str = "hybrid"  # vector, text, hybrid

    def _filter_docs(
        self, documents: list[RetrievedDocument], top_k: int | None = None
    ):
        if top_k:
            documents = documents[:top_k]
        return documents

    def run(
        self, text: str | Document, top_k: Optional[int] = None, **kwargs
    ) -> list[RetrievedDocument]:
        """Retrieve a list of documents from vector store

        Args:
            text: the text to retrieve similar documents
            top_k: number of top similar documents to return

        Returns:
            list[RetrievedDocument]: list of retrieved documents
        """
        if top_k is None:
            top_k = self.top_k

        do_extend = kwargs.pop("do_extend", False)
        thumbnail_count = kwargs.pop("thumbnail_count", 3)

        if do_extend:
            top_k_first_round = top_k * self.first_round_top_k_mult
        else:
            top_k_first_round = top_k

        if self.doc_store is None:
            raise ValueError(
                "doc_store is not provided. Please provide a doc_store to "
                "retrieve the documents"
            )

        result: list[RetrievedDocument] = []
        # TODO: should declare scope directly in the run params
        scope = kwargs.pop("scope", None)
        emb: list[float]

        if self.retrieval_mode == "vector":
            emb = self.embedding(text)[0].embedding
            _, scores, ids = self.vector_store.query(
                embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
            )
            docs = self.doc_store.get(ids)
            result = [
                RetrievedDocument(**doc.to_dict(), score=score)
                for doc, score in zip(docs, scores)
            ]
        elif self.retrieval_mode == "text":
            query = text.text if isinstance(text, Document) else text
            docs = []
            if scope:
                docs = self.doc_store.query(
                    query, top_k=top_k_first_round, doc_ids=scope
                )
            result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]
        elif self.retrieval_mode == "hybrid":
            # similarity search section
            emb = self.embedding(text)[0].embedding
            vs_docs: list[RetrievedDocument] = []
            vs_ids: list[str] = []
            vs_scores: list[float] = []

            def query_vectorstore():
                nonlocal vs_docs
                nonlocal vs_scores
                nonlocal vs_ids

                assert self.doc_store is not None
                _, vs_scores, vs_ids = self.vector_store.query(
                    embedding=emb, top_k=top_k_first_round, doc_ids=scope, **kwargs
                )
                if vs_ids:
                    vs_docs = self.doc_store.get(vs_ids)

            # full-text search section
            ds_docs: list[RetrievedDocument] = []

            def query_docstore():
                nonlocal ds_docs

                assert self.doc_store is not None
                query = text.text if isinstance(text, Document) else text
                if scope:
                    ds_docs = self.doc_store.query(
                        query, top_k=top_k_first_round, doc_ids=scope
                    )

            vs_query_thread = threading.Thread(target=query_vectorstore)
            ds_query_thread = threading.Thread(target=query_docstore)

            vs_query_thread.start()
            ds_query_thread.start()

            vs_query_thread.join()
            ds_query_thread.join()

            result = [
                RetrievedDocument(**doc.to_dict(), score=-1.0)
                for doc in ds_docs
                if doc not in vs_ids
            ]
            result += [
                RetrievedDocument(**doc.to_dict(), score=score)
                for doc, score in zip(vs_docs, vs_scores)
            ]
            print(f"Got {len(vs_docs)} from vectorstore")
            print(f"Got {len(ds_docs)} from docstore")

        # use additional reranker to re-order the document list
        if self.rerankers and text:
            for reranker in self.rerankers:
                # if reranker is LLMReranking, limit the document with top_k items only
                if isinstance(reranker, LLMReranking):
                    result = self._filter_docs(result, top_k=top_k)
                result = reranker.run(documents=result, query=text)

        result = self._filter_docs(result, top_k=top_k)
        print(f"Got raw {len(result)} retrieved documents")

        # add page thumbnails to the result if exists
        thumbnail_doc_ids: set[str] = set()
        # we should copy the text from retrieved text chunk
        # to the thumbnail to get relevant LLM score correctly
        text_thumbnail_docs: dict[str, RetrievedDocument] = {}

        non_thumbnail_docs = []
        raw_thumbnail_docs = []
        for doc in result:
            if doc.metadata.get("type") == "thumbnail":
                # change type to image to display on UI
                doc.metadata["type"] = "image"
                raw_thumbnail_docs.append(doc)
                continue
            if (
                "thumbnail_doc_id" in doc.metadata
                and len(thumbnail_doc_ids) < thumbnail_count
            ):
                thumbnail_id = doc.metadata["thumbnail_doc_id"]
                thumbnail_doc_ids.add(thumbnail_id)
                text_thumbnail_docs[thumbnail_id] = doc
            else:
                non_thumbnail_docs.append(doc)

        linked_thumbnail_docs = self.doc_store.get(list(thumbnail_doc_ids))
        print(
            "thumbnail docs",
            len(linked_thumbnail_docs),
            "non-thumbnail docs",
            len(non_thumbnail_docs),
            "raw-thumbnail docs",
            len(raw_thumbnail_docs),
        )
        additional_docs = []

        for thumbnail_doc in linked_thumbnail_docs:
            text_doc = text_thumbnail_docs[thumbnail_doc.doc_id]
            doc_dict = thumbnail_doc.to_dict()
            doc_dict["_id"] = text_doc.doc_id
            doc_dict["content"] = text_doc.content
            doc_dict["metadata"]["type"] = "image"
            for key in text_doc.metadata:
                if key not in doc_dict["metadata"]:
                    doc_dict["metadata"][key] = text_doc.metadata[key]

            additional_docs.append(RetrievedDocument(**doc_dict, score=text_doc.score))

        result = additional_docs + non_thumbnail_docs

        if not result:
            # return output from raw retrieved thumbnails
            result = self._filter_docs(raw_thumbnail_docs, top_k=thumbnail_count)

        return result


class TextVectorQA(BaseComponent):
    retrieving_pipeline: BaseRetrieval
    qa_pipeline: BaseComponent

    def run(self, question, **kwargs):
        retrieved_documents = self.retrieving_pipeline(question, **kwargs)
        return self.qa_pipeline(question, retrieved_documents, **kwargs)


================================================
FILE: libs/kotaemon/kotaemon/llms/__init__.py
================================================
from kotaemon.base.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage

from .base import BaseLLM
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
from .chats import (
    AzureChatOpenAI,
    ChatLLM,
    ChatOpenAI,
    EndpointChatLLM,
    LCAnthropicChat,
    LCAzureChatOpenAI,
    LCChatOpenAI,
    LCCohereChat,
    LCGeminiChat,
    LCOllamaChat,
    LlamaCppChat,
    StructuredOutputChatOpenAI,
)
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
from .cot import ManualSequentialChainOfThought, Thought
from .linear import GatedLinearPipeline, SimpleLinearPipeline
from .prompts import BasePromptComponent, PromptTemplate

__all__ = [
    "BaseLLM",
    # chat-specific components
    "ChatLLM",
    "EndpointChatLLM",
    "BaseMessage",
    "HumanMessage",
    "AIMessage",
    "SystemMessage",
    "AzureChatOpenAI",
    "ChatOpenAI",
    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
    "LCOllamaChat",
    "LCAzureChatOpenAI",
    "LCChatOpenAI",
    "LlamaCppChat",
    # completion-specific components
    "LLM",
    "OpenAI",
    "AzureOpenAI",
    "LlamaCpp",
    # prompt-specific components
    "BasePromptComponent",
    "PromptTemplate",
    # strategies
    "SimpleLinearPipeline",
    "GatedLinearPipeline",
    "SimpleBranchingPipeline",
    "GatedBranchingPipeline",
    # chain-of-thoughts
    "ManualSequentialChainOfThought",
    "Thought",
]


================================================
FILE: libs/kotaemon/kotaemon/llms/base.py
================================================
from typing import AsyncGenerator, Iterator

from langchain_core.language_models.base import BaseLanguageModel

from kotaemon.base import BaseComponent, LLMInterface


class BaseLLM(BaseComponent):
    def to_langchain_format(self) -> BaseLanguageModel:
        raise NotImplementedError

    def invoke(self, *args, **kwargs) -> LLMInterface:
        raise NotImplementedError

    async def ainvoke(self, *args, **kwargs) -> LLMInterface:
        raise NotImplementedError

    def stream(self, *args, **kwargs) -> Iterator[LLMInterface]:
        raise NotImplementedError

    def astream(self, *args, **kwargs) -> AsyncGenerator[LLMInterface, None]:
        raise NotImplementedError

    def run(self, *args, **kwargs):
        return self.invoke(*args, **kwargs)


================================================
FILE: libs/kotaemon/kotaemon/llms/branching.py
================================================
from typing import List, Optional

from kotaemon.base import BaseComponent, Document, Param

from .linear import GatedLinearPipeline


class SimpleBranchingPipeline(BaseComponent):
    """
    A simple branching pipeline for executing multiple branches.

    Attributes:
        branches (List[BaseComponent]): The list of branches to be executed.

    Example:
        ```python
        from kotaemon.llms import (
            LCAzureChatOpenAI,
            BasePromptComponent,
            GatedLinearPipeline,
        )
        from kotaemon.parsers import RegexExtractor

        def identity(x):
            return x

        pipeline = SimpleBranchingPipeline()
        llm = LCAzureChatOpenAI(
            openai_api_base="your openai api base",
            openai_api_key="your openai api key",
            openai_api_version="your openai api version",
            deployment_name="dummy-q2-gpt35",
            temperature=0,
            request_timeout=600,
        )

        for i in range(3):
            pipeline.add_branch(
                GatedLinearPipeline(
                    prompt=BasePromptComponent(template=f"what is {i} in Japanese ?"),
                    condition=RegexExtractor(pattern=f"{i}"),
                    llm=llm,
                    post_processor=identity,
                )
            )
        print(pipeline(condition_text="1"))
        print(pipeline(condition_text="2"))
        print(pipeline(condition_text="12"))
        ```
    """

    branches: List[BaseComponent] = Param(default_callback=lambda *_: [])

    def add_branch(self, component: BaseComponent):
        """
        Add a new branch to the pipeline.

        Args:
            component (BaseComponent): The branch component to be added.
        """
        self.branches.append(component)

    def run(self, **prompt_kwargs):
        """
        Execute the pipeline by running each branch and return the outputs as a list.

        Args:
            **prompt_kwargs: Keyword arguments for the branches.

        Returns:
            List: The outputs of each branch as a list.
        """
        output = []
        for i, branch in enumerate(self.branches):
            self._prepare_child(branch, name=f"branch-{i}")
            output.append(branch(**prompt_kwargs))

        return output


class GatedBranchingPipeline(SimpleBranchingPipeline):
    """
    A simple gated branching pipeline for executing multiple branches based on a
        condition.

    This class extends the SimpleBranchingPipeline class and adds the ability to execute
        the branches until a branch returns a non-empty output based on a condition.

    Attributes:
        branches (List[BaseComponent]): The list of branches to be executed.

    Example:
        ```python
        from kotaemon.llms import (
            LCAzureChatOpenAI,
            BasePromptComponent,
            GatedLinearPipeline,
        )
        from kotaemon.parsers import RegexExtractor

        def identity(x):
            return x

        pipeline = GatedBranchingPipeline()
        llm = LCAzureChatOpenAI(
            openai_api_base="your openai api base",
            openai_api_key="your openai api key",
            openai_api_version="your openai api version",
            deployment_name="dummy-q2-gpt35",
            temperature=0,
            request_timeout=600,
        )

        for i in range(3):
            pipeline.add_branch(
                GatedLinearPipeline(
                    prompt=BasePromptComponent(template=f"what is {i} in Japanese ?"),
                    condition=RegexExtractor(pattern=f"{i}"),
                    llm=llm,
                    post_processor=identity,
                )
            )
        print(pipeline(condition_text="1"))
        print(pipeline(condition_text="2"))
        ```
    """

    def run(self, *, condition_text: Optional[str] = None, **prompt_kwargs):
        """
        Execute the pipeline by running each branch and return the output of the first
            branch that returns a non-empty output based on the provided condition.

        Args:
            condition_text (str): The condition text to evaluate for each branch.
                Default to None.
            **prompt_kwargs: Keyword arguments for the branches.

        Returns:
            Union[OutputType, None]: The output of the first branch that satisfies the
            condition, or None if no branch satisfies the condition.

        Raises:
            ValueError: If condition_text is None
        """
        if condition_text is None:
            raise ValueError("`condition_text` must be provided.")

        for i, branch in enumerate(self.branches):
            self._prepare_child(branch, name=f"branch-{i}")
            output = branch(condition_text=condition_text, **prompt_kwargs)
            if output:
                return output

        return Document(None)


if __name__ == "__main__":
    import dotenv

    from kotaemon.llms import BasePromptComponent, LCAzureChatOpenAI
    from kotaemon.parsers import RegexExtractor

    def identity(x):
        return x

    secrets = dotenv.dotenv_values(".env")

    pipeline = GatedBranchingPipeline()
    llm = LCAzureChatOpenAI(
        openai_api_base=secrets.get("OPENAI_API_BASE", ""),
        openai_api_key=secrets.get("OPENAI_API_KEY", ""),
        openai_api_version=secrets.get("OPENAI_API_VERSION", ""),
        deployment_name="dummy-q2-gpt35",
        temperature=0,
        request_timeout=600,
    )

    for i in range(3):
        pipeline.add_branch(
            GatedLinearPipeline(
                prompt=BasePromptComponent(template=f"what is {i} in Japanese ?"),
                condition=RegexExtractor(pattern=f"{i}"),
                llm=llm,
                post_processor=identity,
            )
        )
    pipeline(condition_text="1")


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/__init__.py
================================================
from .base import ChatLLM
from .endpoint_based import EndpointChatLLM
from .langchain_based import (
    LCAnthropicChat,
    LCAzureChatOpenAI,
    LCChatMixin,
    LCChatOpenAI,
    LCCohereChat,
    LCGeminiChat,
    LCOllamaChat,
)
from .llamacpp import LlamaCppChat
from .openai import AzureChatOpenAI, ChatOpenAI, StructuredOutputChatOpenAI

__all__ = [
    "ChatOpenAI",
    "AzureChatOpenAI",
    "ChatLLM",
    "EndpointChatLLM",
    "ChatOpenAI",
    "StructuredOutputChatOpenAI",
    "LCAnthropicChat",
    "LCGeminiChat",
    "LCCohereChat",
    "LCOllamaChat",
    "LCChatOpenAI",
    "LCAzureChatOpenAI",
    "LCChatMixin",
    "LlamaCppChat",
]


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/base.py
================================================
from __future__ import annotations

import logging

from kotaemon.base import BaseComponent
from kotaemon.llms.base import BaseLLM

logger = logging.getLogger(__name__)


class ChatLLM(BaseLLM):
    def flow(self):
        if self.inflow is None:
            raise ValueError("No inflow provided.")

        if not isinstance(self.inflow, BaseComponent):
            raise ValueError(
                f"inflow must be a BaseComponent, found {type(self.inflow)}"
            )

        text = self.inflow.flow().text
        return self.__call__(text)


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/endpoint_based.py
================================================
import requests

from kotaemon.base import (
    AIMessage,
    BaseMessage,
    HumanMessage,
    LLMInterface,
    Param,
    SystemMessage,
)

from .base import ChatLLM


class EndpointChatLLM(ChatLLM):
    """
    A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API
    compatible endpoint.

    Attributes:
        endpoint_url (str): The url of a OpenAI API compatible endpoint.
    """

    endpoint_url: str = Param(
        help="URL of the OpenAI API compatible endpoint", required=True
    )

    def run(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        """
        Generate response from messages
        Args:
            messages (str | BaseMessage | list[BaseMessage]): history of messages to
                generate response from
            **kwargs: additional arguments to pass to the OpenAI API
        Returns:
            LLMInterface: generated response
        """
        if isinstance(messages, str):
            input_ = [HumanMessage(content=messages)]
        elif isinstance(messages, BaseMessage):
            input_ = [messages]
        else:
            input_ = messages

        def decide_role(message: BaseMessage):
            if isinstance(message, SystemMessage):
                return "system"
            elif isinstance(message, AIMessage):
                return "assistant"
            else:
                return "user"

        request_json = {
            "messages": [{"content": m.text, "role": decide_role(m)} for m in input_]
        }

        response = requests.post(self.endpoint_url, json=request_json).json()

        content = ""
        candidates = []
        if response["choices"]:
            candidates = [
                each["message"]["content"]
                for each in response["choices"]
                if each["message"]["content"]
            ]
            content = candidates[0]

        return LLMInterface(
            content=content,
            candidates=candidates,
            completion_tokens=response["usage"]["completion_tokens"],
            total_tokens=response["usage"]["total_tokens"],
            prompt_tokens=response["usage"]["prompt_tokens"],
        )

    def invoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        """Same as run"""
        return self.run(messages, **kwargs)

    async def ainvoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        return self.invoke(messages, **kwargs)


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/langchain_based.py
================================================
from __future__ import annotations

import logging
from typing import AsyncGenerator, Iterator

from kotaemon.base import BaseMessage, HumanMessage, LLMInterface, Param

from .base import ChatLLM

logger = logging.getLogger(__name__)


class LCChatMixin:
    """Mixin for langchain based chat models"""

    def _get_lc_class(self):
        raise NotImplementedError(
            "Please return the relevant Langchain class in in _get_lc_class"
        )

    def _get_tool_call_kwargs(self):
        return {}

    def __init__(self, stream: bool = False, **params):
        self._lc_class = self._get_lc_class()
        self._obj = self._lc_class(**params)
        self._kwargs: dict = params
        self._stream = stream

        super().__init__()

    def run(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        if self._stream:
            return self.stream(messages, **kwargs)  # type: ignore
        return self.invoke(messages, **kwargs)

    def prepare_message(self, messages: str | BaseMessage | list[BaseMessage]):
        input_: list[BaseMessage] = []

        if isinstance(messages, str):
            input_ = [HumanMessage(content=messages)]
        elif isinstance(messages, BaseMessage):
            input_ = [messages]
        else:
            input_ = messages

        return input_

    def prepare_response(self, pred):
        all_text = [each.text for each in pred.generations[0]]
        all_messages = [each.message for each in pred.generations[0]]

        completion_tokens, total_tokens, prompt_tokens = 0, 0, 0
        try:
            if pred.llm_output is not None:
                completion_tokens = pred.llm_output["token_usage"]["completion_tokens"]
                total_tokens = pred.llm_output["token_usage"]["total_tokens"]
                prompt_tokens = pred.llm_output["token_usage"]["prompt_tokens"]
        except Exception:
            pass

        return LLMInterface(
            text=all_text[0] if len(all_text) > 0 else "",
            candidates=all_text,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
            prompt_tokens=prompt_tokens,
            messages=all_messages,
            logits=[],
        )

    def invoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        """Generate response from messages

        Args:
            messages: history of messages to generate response from
            **kwargs: additional arguments to pass to the langchain chat model

        Returns:
            LLMInterface: generated response
        """
        input_ = self.prepare_message(messages)

        if "tools_pydantic" in kwargs:
            tools = kwargs.pop(
                "tools_pydantic",
            )
            lc_tool_call = self._obj.bind_tools(tools)
            pred = lc_tool_call.invoke(
                input_,
                **self._get_tool_call_kwargs(),
            )
            if pred.tool_calls:
                tool_calls = pred.tool_calls
            else:
                tool_calls = pred.additional_kwargs.get("tool_calls", [])

            output = LLMInterface(
                content="",
                additional_kwargs={"tool_calls": tool_calls},
            )
        else:
            pred = self._obj.generate(messages=[input_], **kwargs)
            output = self.prepare_response(pred)

        return output

    async def ainvoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:
        input_ = self.prepare_message(messages)
        pred = await self._obj.agenerate(messages=[input_], **kwargs)
        return self.prepare_response(pred)

    def stream(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> Iterator[LLMInterface]:
        for response in self._obj.stream(input=messages, **kwargs):
            yield LLMInterface(content=response.content)

    async def astream(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> AsyncGenerator[LLMInterface, None]:
        async for response in self._obj.astream(input=messages, **kwargs):
            yield LLMInterface(content=response.content)

    def to_langchain_format(self):
        return self._obj

    def __repr__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = repr(value_obj)
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __str__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = str(value_obj)
            if len(value) > 20:
                value = f"{value[:15]}..."
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __setattr__(self, name, value):
        if name == "_lc_class":
            return super().__setattr__(name, value)

        if name in self._lc_class.__fields__:
            self._kwargs[name] = value
            self._obj = self._lc_class(**self._kwargs)
        else:
            super().__setattr__(name, value)

    def __getattr__(self, name):
        if name in self._kwargs:
            return self._kwargs[name]
        return getattr(self._obj, name)

    def dump(self, *args, **kwargs):
        from theflow.utils.modules import serialize

        params = {key: serialize(value) for key, value in self._kwargs.items()}
        return {
            "__type__": f"{self.__module__}.{self.__class__.__qualname__}",
            **params,
        }

    def specs(self, path: str):
        path = path.strip(".")
        if "." in path:
            raise ValueError("path should not contain '.'")

        if path in self._lc_class.__fields__:
            return {
                "__type__": "theflow.base.ParamAttr",
                "refresh_on_set": True,
                "strict_type": True,
            }

        raise ValueError(f"Invalid param {path}")


class LCChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore
    def __init__(
        self,
        openai_api_base: str | None = None,
        openai_api_key: str | None = None,
        model: str | None = None,
        temperature: float = 0.7,
        request_timeout: float | None = None,
        **params,
    ):
        super().__init__(
            openai_api_base=openai_api_base,
            openai_api_key=openai_api_key,
            model=model,
            temperature=temperature,
            request_timeout=request_timeout,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import ChatOpenAI
        except ImportError:
            from langchain.chat_models import ChatOpenAI

        return ChatOpenAI


class LCAzureChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore
    def __init__(
        self,
        azure_endpoint: str | None = None,
        openai_api_key: str | None = None,
        openai_api_version: str = "",
        deployment_name: str | None = None,
        temperature: float = 0.7,
        request_timeout: float | None = None,
        **params,
    ):
        super().__init__(
            azure_endpoint=azure_endpoint,
            openai_api_key=openai_api_key,
            openai_api_version=openai_api_version,
            deployment_name=deployment_name,
            temperature=temperature,
            request_timeout=request_timeout,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import AzureChatOpenAI
        except ImportError:
            from langchain.chat_models import AzureChatOpenAI

        return AzureChatOpenAI


class LCAnthropicChat(LCChatMixin, ChatLLM):  # type: ignore
    api_key: str = Param(
        help="API key (https://console.anthropic.com/settings/keys)", required=True
    )
    model_name: str = Param(
        help=(
            "Model name to use "
            "(https://docs.anthropic.com/en/docs/about-claude/models)"
        ),
        required=True,
    )

    def _get_tool_call_kwargs(self):
        return {"tool_choice": {"type": "any"}}

    def __init__(
        self,
        api_key: str | None = None,
        model_name: str | None = None,
        temperature: float = 0.7,
        **params,
    ):
        super().__init__(
            api_key=api_key,
            model_name=model_name,
            temperature=temperature,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_anthropic import ChatAnthropic
        except ImportError:
            raise ImportError("Please install langchain-anthropic")

        return ChatAnthropic


class LCGeminiChat(LCChatMixin, ChatLLM):  # type: ignore
    api_key: str = Param(
        help="API key (https://aistudio.google.com/app/apikey)", required=True
    )
    model_name: str = Param(
        help=(
            "Model name to use (https://cloud.google"
            ".com/vertex-ai/generative-ai/docs/learn/models)"
        ),
        required=True,
    )

    def _get_tool_call_kwargs(self):
        return {
            "tool_config": {
                "function_calling_config": {
                    "mode": "ANY",
                }
            }
        }

    def __init__(
        self,
        api_key: str | None = None,
        model_name: str | None = None,
        temperature: float = 0.7,
        **params,
    ):
        super().__init__(
            google_api_key=api_key,
            model=model_name,
            temperature=temperature,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_google_genai import ChatGoogleGenerativeAI
        except ImportError:
            raise ImportError("Please install langchain-google-genai")

        return ChatGoogleGenerativeAI


class LCCohereChat(LCChatMixin, ChatLLM):  # type: ignore
    api_key: str = Param(
        help="API key (https://dashboard.cohere.com/api-keys)", required=True
    )
    model_name: str = Param(
        help=("Model name to use (https://dashboard.cohere.com/playground/chat)"),
        required=True,
    )

    def __init__(
        self,
        api_key: str | None = None,
        model_name: str | None = None,
        temperature: float = 0.7,
        **params,
    ):
        super().__init__(
            cohere_api_key=api_key,
            model_name=model_name,
            temperature=temperature,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_cohere import ChatCohere
        except ImportError:
            raise ImportError("Please install langchain-cohere")

        return ChatCohere


class LCOllamaChat(LCChatMixin, ChatLLM):  # type: ignore
    base_url: str = Param(
        help="Base Ollama URL. (default: http://localhost:11434/api/)",  # noqa
        required=True,
    )
    model: str = Param(
        help="Model name to use (https://ollama.com/library)",
        required=True,
    )
    num_ctx: int = Param(
        help="The size of the context window (default: 8192)",
        required=True,
    )

    def __init__(
        self,
        model: str | None = None,
        base_url: str | None = None,
        num_ctx: int | None = None,
        **params,
    ):
        super().__init__(
            base_url=base_url,
            model=model,
            num_ctx=num_ctx,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_ollama import ChatOllama
        except ImportError:
            raise ImportError("Please install langchain-ollama")

        return ChatOllama


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/llamacpp.py
================================================
from typing import TYPE_CHECKING, Iterator, Optional, cast

from kotaemon.base import BaseMessage, HumanMessage, LLMInterface, Param

from .base import ChatLLM

if TYPE_CHECKING:
    from llama_cpp import CreateChatCompletionResponse as CCCR
    from llama_cpp import Llama


class LlamaCppChat(ChatLLM):
    """Wrapper around the llama-cpp-python's Llama model"""

    model_path: Optional[str] = Param(
        help="Path to the model file. This is required to load the model.",
    )
    repo_id: Optional[str] = Param(
        help="Id of a repo on the HuggingFace Hub in the form of `user_name/repo_name`."
    )
    filename: Optional[str] = Param(
        help="A filename or glob pattern to match the model file in the repo."
    )
    chat_format: str = Param(
        help=(
            "Chat format to use. Please refer to llama_cpp.llama_chat_format for a "
            "list of supported formats. If blank, the chat format will be auto-"
            "inferred."
        ),
        required=True,
    )
    lora_base: Optional[str] = Param(None, help="Path to the base Lora model")
    n_ctx: Optional[int] = Param(512, help="Text context, 0 = from model")
    n_gpu_layers: Optional[int] = Param(
        0,
        help="Number of layers to offload to GPU. If -1, all layers are offloaded",
    )
    use_mmap: Optional[bool] = Param(
        True,
        help=(),
    )
    vocab_only: Optional[bool] = Param(
        False,
        help="If True, only the vocabulary is loaded. This is useful for debugging.",
    )

    _role_mapper: dict[str, str] = {
        "human": "user",
        "system": "system",
        "ai": "assistant",
    }

    @Param.auto()
    def client_object(self) -> "Llama":
        """Get the llama-cpp-python client object"""
        try:
            from llama_cpp import Llama
        except ImportError:
            raise ImportError(
                "llama-cpp-python is not installed. "
                "Please install it using `pip install llama-cpp-python`"
            )

        errors = []
        if not self.model_path and (not self.repo_id or not self.filename):
            errors.append(
                "- `model_path` or `repo_id` and `filename` are required to load the"
                " model"
            )

        if not self.chat_format:
            errors.append(
                "- `chat_format` is required to know how to format the chat messages. "
                "Please refer to llama_cpp.llama_chat_format for a list of supported "
                "formats."
            )
        if errors:
            raise ValueError("\n".join(errors))

        if self.model_path:
            return Llama(
                model_path=cast(str, self.model_path),
                chat_format=self.chat_format,
                lora_base=self.lora_base,
                n_ctx=self.n_ctx,
                n_gpu_layers=self.n_gpu_layers,
                use_mmap=self.use_mmap,
                vocab_only=self.vocab_only,
            )
        else:
            return Llama.from_pretrained(
                repo_id=self.repo_id,
                filename=self.filename,
                chat_format=self.chat_format,
                lora_base=self.lora_base,
                n_ctx=self.n_ctx,
                n_gpu_layers=self.n_gpu_layers,
                use_mmap=self.use_mmap,
                vocab_only=self.vocab_only,
            )

    def prepare_message(
        self, messages: str | BaseMessage | list[BaseMessage]
    ) -> list[dict]:
        input_: list[BaseMessage] = []

        if isinstance(messages, str):
            input_ = [HumanMessage(content=messages)]
        elif isinstance(messages, BaseMessage):
            input_ = [messages]
        else:
            input_ = messages

        output_ = [
            {"role": self._role_mapper[each.type], "content": each.content}
            for each in input_
        ]

        return output_

    def invoke(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> LLMInterface:

        pred: "CCCR" = self.client_object.create_chat_completion(
            messages=self.prepare_message(messages),
            stream=False,
        )

        return LLMInterface(
            content=pred["choices"][0]["message"]["content"] if pred["choices"] else "",
            candidates=[
                c["message"]["content"]
                for c in pred["choices"]
                if c["message"]["content"]
            ],
            completion_tokens=pred["usage"]["completion_tokens"],
            total_tokens=pred["usage"]["total_tokens"],
            prompt_tokens=pred["usage"]["prompt_tokens"],
        )

    def stream(
        self, messages: str | BaseMessage | list[BaseMessage], **kwargs
    ) -> Iterator[LLMInterface]:
        pred = self.client_object.create_chat_completion(
            messages=self.prepare_message(messages),
            stream=True,
        )
        for chunk in pred:
            if not chunk["choices"]:
                continue

            if "content" not in chunk["choices"][0]["delta"]:
                continue

            yield LLMInterface(content=chunk["choices"][0]["delta"]["content"])


================================================
FILE: libs/kotaemon/kotaemon/llms/chats/openai.py
================================================
from typing import TYPE_CHECKING, AsyncGenerator, Iterator, Optional, Type

from pydantic import BaseModel
from theflow.utils.modules import import_dotted_string

from kotaemon.base import (
    AIMessage,
    BaseMessage,
    HumanMessage,
    LLMInterface,
    Param,
    StructuredOutputLLMInterface,
)

from .base import ChatLLM

if TYPE_CHECKING:
    from openai.types.chat.chat_completion_message_param import (
        ChatCompletionMessageParam,
    )


class BaseChatOpenAI(ChatLLM):
    """Base interface for OpenAI chat model, using the openai library

    This class exposes the parameters in resources.Chat. To subclass this class:

        - Implement the `prepare_client` method to return the OpenAI client
        - Implement the `openai_response` method to return the OpenAI response
        - Implement the params relate to the OpenAI client
    """

    _dependencies = ["openai"]
    _capabilities = ["chat", "text"]  # consider as mixin

    api_key: str = Param(help="API key", required=True)
    timeout: Optional[float] = Param(None, help="Timeout for the API request")
    max_retries: Optional[int] = Param(
        None, help="Maximum number of retries for the API request"
    )

    temperature: Optional[float] = Param(
        None,
        help=(
            "Number between 0 and 2 that controls the randomness of the generated "
            "tokens. Lower values make the model more deterministic, while higher "
            "values make the model more random."
        ),
    )
    max_tokens: Optional[int] = Param(
        None,
        help=(
            "Maximum number of tokens to generate. The total length of input tokens "
            "and generated tokens is limited by the model's context length."
        ),
    )
    n: int = Param(
        1,
        help=(
            "Number of completions to generate. The API will generate n completion "
            "for each prompt."
        ),
    )
    stop: Optional[str | list[str]] = Param(
        None,
        help=(
            "Stop sequence. If a stop sequence is detected, generation will stop "
            "at that point. If not specified, generation will continue until the "
            "maximum token length is reached."
        ),
    )
    frequency_penalty: Optional[float] = Param(
        None,
        help=(
            "Number between -2.0 and 2.0. Positive values penalize new tokens "
            "based on their existing frequency in the text so far, decrearsing the "
            "model's likelihood of repeating the same text."
        ),
    )
    presence_penalty: Optional[float] = Param(
        None,
        help=(
            "Number between -2.0 and 2.0. Positive values penalize new tokens "
            "based on their existing presence in the text so far, decrearsing the "
            "model's likelihood of repeating the same text."
        ),
    )
    tool_choice: Optional[str] = Param(
        None,
        help=(
            "Choice of tool to use for the completion. Available choices are: "
            "auto, default."
        ),
    )
    tools: Optional[list[str]] = Param(
        None,
        help="List of tools to use for the completion.",
    )
    logprobs: Optional[bool] = Param(
        None,
        help=(
            "Include log probabilities on the logprobs most likely tokens, "
            "as well as the chosen token."
        ),
    )
    logit_bias: Optional[dict] = Param(
        None,
        help=(
            "Dictionary of logit bias values to add to the logits of the tokens "
            "in the vocabulary."
        ),
    )
    top_logprobs: Optional[int] = Param(
        None,
        help=(
            "An integer between 0 and 5 specifying the number of most likely tokens "
            "to return at each token position, each with an associated log "
            "probability. `logprobs` must also be set to `true` if this parameter "
            "is used."
        ),
    )
    top_p: Optional[float] = Param(
        None,
        help=(
            "An alternative to sampling with temperature, called nucleus sampling, "
            "where the model considers the results of the token with top_p "
            "probability mass. So 0.1 means that only the tokens comprising the "
            "top 10% probability mass are considered."
        ),
    )

    @Param.auto(depends_on=["max_retries"])
    def max_retries_(self):
        if self.max_retries is None:
            from openai._constants import DEFAULT_MAX_RETRIES

            return DEFAULT_MAX_RETRIES
        return self.max_retries

    def prepare_message(
        self, messages: str | BaseMessage | list[BaseMessage]
    ) -> list["ChatCompletionMessageParam"]:
        """Prepare the message into OpenAI format

        Returns:
            list[dict]: List of messages in OpenAI format
        """
        input_: list[BaseMessage] = []
        output_: list["ChatCompletionMessageParam"] = []

        if isinstance(messages, str):
            input_ = [HumanMessage(content=messages)]
        elif isinstance(messages, BaseMessage):
            input_ = [messages]
        else:
            input_ = messages

        for message in input_:
            output_.append(message.to_openai_format())

        return output_

    def prepare_output(self, resp: dict) -> LLMInterface:
        """Convert the OpenAI response into LLMInterface"""
        additional_kwargs = {}
        if "tool_calls" in resp["choices"][0]["message"]:
            additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
                "tool_calls"
            ]

        if resp["choices"][0].get("logprobs") is None:
            logprobs = []
        else:
            all_logprobs = resp["choices"][0]["logprobs"].get("content")
            logprobs = (
                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
            )

        output = LLMInterface(
            candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
            content=resp["choices"][0]["message"]["content"] or "",
            total_tokens=resp["usage"]["total_tokens"],
            prompt_tokens=resp["usage"]["prompt_tokens"],
            completion_tokens=resp["usage"]["completion_tokens"],
            additional_kwargs=additional_kwargs,
            messages=[
                AIMessage(content=(_["message"]["content"]) or "")
                for _ in resp["choices"]
            ],
            logprobs=logprobs,
        )

        return output

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        raise NotImplementedError

    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        raise NotImplementedError

    async def aopenai_response(self, client, **kwargs):
        """Get the openai response"""
        raise NotImplementedError

    def invoke(
        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs
    ) -> LLMInterface:
        client = self.prepare_client(async_version=False)
        input_messages = self.prepare_message(messages)
        resp = self.openai_response(
            client, messages=input_messages, stream=False, **kwargs
        ).dict()
        return self.prepare_output(resp)

    async def ainvoke(
        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs
    ) -> LLMInterface:
        client = self.prepare_client(async_version=True)
        input_messages = self.prepare_message(messages)
        resp = (
            await self.aopenai_response(
                client, messages=input_messages, stream=False, **kwargs
            )
        ).dict()

        return self.prepare_output(resp)

    def stream(
        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs
    ) -> Iterator[LLMInterface]:
        client = self.prepare_client(async_version=False)
        input_messages = self.prepare_message(messages)
        resp = self.openai_response(
            client, messages=input_messages, stream=True, **kwargs
        )

        for c in resp:
            chunk = c.dict()
            if not chunk["choices"]:
                continue
            if chunk["choices"][0]["delta"]["content"] is not None:
                if chunk["choices"][0].get("logprobs") is None:
                    logprobs = []
                else:
                    logprobs = [
                        logprob["logprob"]
                        for logprob in chunk["choices"][0]["logprobs"].get(
                            "content", []
                        )
                    ]

                yield LLMInterface(
                    content=chunk["choices"][0]["delta"]["content"], logprobs=logprobs
                )

    async def astream(
        self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs
    ) -> AsyncGenerator[LLMInterface, None]:
        client = self.prepare_client(async_version=True)
        input_messages = self.prepare_message(messages)
        resp = self.openai_response(
            client, messages=input_messages, stream=True, **kwargs
        )

        async for chunk in resp:
            if not chunk.choices:
                continue
            if chunk.choices[0].delta.content is not None:
                yield LLMInterface(content=chunk.choices[0].delta.content)


class ChatOpenAI(BaseChatOpenAI):
    """OpenAI chat model"""

    base_url: Optional[str] = Param(None, help="OpenAI base URL")
    organization: Optional[str] = Param(None, help="OpenAI organization")
    model: str = Param(help="OpenAI model", required=True)

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        params = {
            "api_key": self.api_key,
            "organization": self.organization,
            "base_url": self.base_url,
            "timeout": self.timeout,
            "max_retries": self.max_retries_,
        }
        if async_version:
            from openai import AsyncOpenAI

            return AsyncOpenAI(**params)

        from openai import OpenAI

        return OpenAI(**params)

    def prepare_params(self, **kwargs):
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")

        params_ = {
            "model": self.model,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "n": self.n,
            "stop": self.stop,
            "frequency_penalty": self.frequency_penalty,
            "presence_penalty": self.presence_penalty,
            "tool_choice": self.tool_choice,
            "tools": self.tools,
            "logprobs": self.logprobs,
            "logit_bias": self.logit_bias,
            "top_logprobs": self.top_logprobs,
            "top_p": self.top_p,
        }
        params = {k: v for k, v in params_.items() if v is not None}
        params.update(kwargs)

        return params

    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)
        return client.chat.completions.create(**params)

    async def aopenai_response(self, client, **kwargs):
        params = self.prepare_params(**kwargs)
        return await client.chat.completions.create(**params)


class StructuredOutputChatOpenAI(ChatOpenAI):
    """OpenAI chat model that returns structured output"""

    response_schema: Type[BaseModel] = Param(
        help="class that subclasses pydantics BaseModel", required=True
    )

    def prepare_output(self, resp: dict) -> StructuredOutputLLMInterface:
        """Convert the OpenAI response into StructuredOutputLLMInterface"""
        additional_kwargs = {}

        if "tool_calls" in resp["choices"][0]["message"]:
            additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
                "tool_calls"
            ]

        if resp["choices"][0].get("logprobs") is None:
            logprobs = []
        else:
            all_logprobs = resp["choices"][0]["logprobs"].get("content")
            logprobs = (
                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
            )

        output = StructuredOutputLLMInterface(
            parsed=resp["choices"][0]["message"]["parsed"],
            candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
            content=resp["choices"][0]["message"]["content"] or "",
            total_tokens=resp["usage"]["total_tokens"],
            prompt_tokens=resp["usage"]["prompt_tokens"],
            completion_tokens=resp["usage"]["completion_tokens"],
            messages=[
                AIMessage(content=(_["message"]["content"]) or "")
                for _ in resp["choices"]
            ],
            additional_kwargs=additional_kwargs,
            logprobs=logprobs,
        )

        return output

    def prepare_params(self, **kwargs):
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")

        params_ = {
            "model": self.model,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "n": self.n,
            "stop": self.stop,
            "frequency_penalty": self.frequency_penalty,
            "presence_penalty": self.presence_penalty,
            "tool_choice": self.tool_choice,
            "tools": self.tools,
            "logprobs": self.logprobs,
            "logit_bias": self.logit_bias,
            "top_logprobs": self.top_logprobs,
            "top_p": self.top_p,
            "response_format": self.response_schema,
        }
        params = {k: v for k, v in params_.items() if v is not None}
        params.update(kwargs)

        # doesn't do streaming
        params.pop("stream")

        return params

    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)

        return client.beta.chat.completions.parse(**params)

    async def aopenai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)

        return await client.beta.chat.completions.parse(**params)


class AzureChatOpenAI(BaseChatOpenAI):
    """OpenAI chat model provided by Microsoft Azure"""

    azure_endpoint: str = Param(
        help=(
            "HTTPS endpoint for the Azure OpenAI model. The azure_endpoint, "
            "azure_deployment, and api_version parameters are used to construct "
            "the full URL for the Azure OpenAI model."
        ),
        required=True,
    )
    azure_deployment: str = Param(help="Azure deployment name", required=True)
    api_version: str = Param(help="Azure model version", required=True)
    azure_ad_token: Optional[str] = Param(None, help="Azure AD token")
    azure_ad_token_provider: Optional[str] = Param(None, help="Azure AD token provider")

    @Param.auto(depends_on=["azure_ad_token_provider"])
    def azure_ad_token_provider_(self):
        if isinstance(self.azure_ad_token_provider, str):
            return import_dotted_string(self.azure_ad_token_provider, safe=False)

    def prepare_client(self, async_version: bool = False):
        """Get the OpenAI client

        Args:
            async_version (bool): Whether to get the async version of the client
        """
        params = {
            "azure_endpoint": self.azure_endpoint,
            "api_version": self.api_version,
            "api_key": self.api_key,
            "azure_ad_token": self.azure_ad_token,
            "azure_ad_token_provider": self.azure_ad_token_provider_,
            "timeout": self.timeout,
            "max_retries": self.max_retries_,
        }
        if async_version:
            from openai import AsyncAzureOpenAI

            return AsyncAzureOpenAI(**params)

        from openai import AzureOpenAI

        return AzureOpenAI(**params)

    def prepare_params(self, **kwargs):
        if "tools_pydantic" in kwargs:
            kwargs.pop("tools_pydantic")

        params_ = {
            "model": self.azure_deployment,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "n": self.n,
            "stop": self.stop,
            "frequency_penalty": self.frequency_penalty,
            "presence_penalty": self.presence_penalty,
            "tool_choice": self.tool_choice,
            "tools": self.tools,
            "logprobs": self.logprobs,
            "logit_bias": self.logit_bias,
            "top_logprobs": self.top_logprobs,
            "top_p": self.top_p,
        }
        params = {k: v for k, v in params_.items() if v is not None}
        params.update(kwargs)

        return params

    def openai_response(self, client, **kwargs):
        """Get the openai response"""
        params = self.prepare_params(**kwargs)
        return client.chat.completions.create(**params)

    async def aopenai_response(self, client, **kwargs):
        params = self.prepare_params(**kwargs)
        return await client.chat.completions.create(**params)


================================================
FILE: libs/kotaemon/kotaemon/llms/completions/__init__.py
================================================
from .base import LLM
from .langchain_based import AzureOpenAI, LCCompletionMixin, LlamaCpp, OpenAI

__all__ = ["LLM", "OpenAI", "AzureOpenAI", "LCCompletionMixin", "LlamaCpp"]


================================================
FILE: libs/kotaemon/kotaemon/llms/completions/base.py
================================================
from kotaemon.llms.base import BaseLLM


class LLM(BaseLLM):
    pass


================================================
FILE: libs/kotaemon/kotaemon/llms/completions/langchain_based.py
================================================
import logging
from typing import Optional

from kotaemon.base import LLMInterface

from .base import LLM

logger = logging.getLogger(__name__)


class LCCompletionMixin:
    def _get_lc_class(self):
        raise NotImplementedError(
            "Please return the relevant Langchain class in in _get_lc_class"
        )

    def __init__(self, **params):
        self._lc_class = self._get_lc_class()
        self._obj = self._lc_class(**params)
        self._kwargs: dict = params

        super().__init__()

    def run(self, text: str) -> LLMInterface:
        pred = self._obj.generate([text])
        all_text = [each.text for each in pred.generations[0]]

        completion_tokens, total_tokens, prompt_tokens = 0, 0, 0
        try:
            if pred.llm_output is not None:
                completion_tokens = pred.llm_output["token_usage"]["completion_tokens"]
                total_tokens = pred.llm_output["token_usage"]["total_tokens"]
                prompt_tokens = pred.llm_output["token_usage"]["prompt_tokens"]
        except Exception:
            logger.warning(
                f"Cannot get token usage from LLM output for {self._lc_class.__name__}"
            )

        return LLMInterface(
            text=all_text[0] if len(all_text) > 0 else "",
            candidates=all_text,
            completion_tokens=completion_tokens,
            total_tokens=total_tokens,
            prompt_tokens=prompt_tokens,
            logits=[],
        )

    def to_langchain_format(self):
        return self._obj

    def __repr__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = repr(value_obj)
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __str__(self):
        kwargs = []
        for key, value_obj in self._kwargs.items():
            value = str(value_obj)
            if len(value) > 20:
                value = f"{value[:15]}..."
            kwargs.append(f"{key}={value}")
        kwargs_repr = ", ".join(kwargs)
        return f"{self.__class__.__name__}({kwargs_repr})"

    def __setattr__(self, name, value):
        if name == "_lc_class":
            return super().__setattr__(name, value)

        if name in self._lc_class.__fields__:
            self._kwargs[name] = value
            self._obj = self._lc_class(**self._kwargs)
        else:
            super().__setattr__(name, value)

    def __getattr__(self, name):
        if name in self._kwargs:
            return self._kwargs[name]
        return getattr(self._obj, name)

    def dump(self, *args, **kwargs):
        from theflow.utils.modules import serialize

        params = {key: serialize(value) for key, value in self._kwargs.items()}
        return {
            "__type__": f"{self.__module__}.{self.__class__.__qualname__}",
            **params,
        }

    def specs(self, path: str):
        path = path.strip(".")
        if "." in path:
            raise ValueError("path should not contain '.'")

        if path in self._lc_class.__fields__:
            return {
                "__type__": "theflow.base.ParamAttr",
                "refresh_on_set": True,
                "strict_type": True,
            }

        raise ValueError(f"Invalid param {path}")


class OpenAI(LCCompletionMixin, LLM):
    """Wrapper around Langchain's OpenAI class, focusing on key parameters"""

    def __init__(
        self,
        openai_api_key: Optional[str] = None,
        openai_api_base: Optional[str] = None,
        model_name: str = "text-davinci-003",
        temperature: float = 0.7,
        max_tokens: int = 256,
        top_p: float = 1,
        frequency_penalty: float = 0,
        n: int = 1,
        best_of: int = 1,
        request_timeout: Optional[float] = None,
        max_retries: int = 2,
        streaming: bool = False,
        **params,
    ):
        super().__init__(
            openai_api_key=openai_api_key,
            openai_api_base=openai_api_base,
            model_name=model_name,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            n=n,
            best_of=best_of,
            request_timeout=request_timeout,
            max_retries=max_retries,
            streaming=streaming,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import OpenAI
        except ImportError:
            from langchain.llms import OpenAI

        return OpenAI


class AzureOpenAI(LCCompletionMixin, LLM):
    """Wrapper around Langchain's AzureOpenAI class, focusing on key parameters"""

    def __init__(
        self,
        azure_endpoint: Optional[str] = None,
        deployment_name: Optional[str] = None,
        openai_api_version: str = "",
        openai_api_key: Optional[str] = None,
        model_name: str = "text-davinci-003",
        temperature: float = 0.7,
        max_tokens: int = 256,
        top_p: float = 1,
        frequency_penalty: float = 0,
        n: int = 1,
        best_of: int = 1,
        request_timeout: Optional[float] = None,
        max_retries: int = 2,
        streaming: bool = False,
        **params,
    ):
        super().__init__(
            azure_endpoint=azure_endpoint,
            deployment_name=deployment_name,
            openai_api_version=openai_api_version,
            openai_api_key=openai_api_key,
            model_name=model_name,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            n=n,
            best_of=best_of,
            request_timeout=request_timeout,
            max_retries=max_retries,
            streaming=streaming,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_openai import AzureOpenAI
        except ImportError:
            from langchain.llms import AzureOpenAI

        return AzureOpenAI


class LlamaCpp(LCCompletionMixin, LLM):
    """Wrapper around Langchain's LlamaCpp class, focusing on key parameters"""

    def __init__(
        self,
        model_path: str,
        lora_base: Optional[str] = None,
        n_ctx: int = 512,
        n_gpu_layers: Optional[int] = None,
        use_mmap: bool = True,
        **params,
    ):
        super().__init__(
            model_path=model_path,
            lora_base=lora_base,
            n_ctx=n_ctx,
            n_gpu_layers=n_gpu_layers,
            use_mmap=use_mmap,
            **params,
        )

    def _get_lc_class(self):
        try:
            from langchain_community.llms import LlamaCpp
        except ImportError:
            from langchain.llms import LlamaCpp

        return LlamaCpp


================================================
FILE: libs/kotaemon/kotaemon/llms/cot.py
================================================
from copy import deepcopy
from typing import Callable, List

from theflow import Function, Node, Param

from kotaemon.base import BaseComponent, Document

from .chats import LCAzureChatOpenAI
from .completions import LLM
from .prompts import BasePromptComponent


class Thought(BaseComponent):
    """A thought in the chain of thought

    - Input: `**kwargs` pairs, where key is the placeholder in the prompt, and
    value is the value.
    - Output: an output dictionary

    _**Usage:**_

    Create and run a thought:

    ```python
    >> from kotaemon.pipelines.cot import Thought
    >> thought = Thought(
         prompt="How to {action} {object}?",
         llm=LCAzureChatOpenAI(...),
         post_process=lambda string: {"tutorial": string},
       )
    >> output = thought(action="install", object="python")
    >> print(output)
    {'tutorial': 'As an AI language model,...'}
    ```

    Basically, when a thought is run, it will:

    1. Populate the prompt template with the input `**kwargs`.
    2. Run the LLM model with the populated prompt.
    3. Post-process the LLM output with the post-processor.

    This `Thought` allows chaining sequentially with the + operator. For example:

    ```python
    >> llm = LCAzureChatOpenAI(...)
    >> thought1 = Thought(
           prompt="Word {word} in {language} is ",
           llm=llm,
           post_process=lambda string: {"translated": string},
       )
    >> thought2 = Thought(
            prompt="Translate {translated} to Japanese",
            llm=llm,
            post_process=lambda string: {"output": string},
       )

    >> thought = thought1 + thought2
    >> thought(word="hello", language="French")
    {'word': 'hello',
     'language': 'French',
     'translated': '"Bonjour"',
     'output': 'こんにちは (Konnichiwa)'}
    ```

    Under the hood, when the `+` operator is used, a `ManualSequentialChainOfThought`
    is created.
    """

    prompt: str = Param(
        help=(
            "The prompt template string. This prompt template has Python-like variable"
            " placeholders, that then will be substituted with real values when this"
            " component is executed"
        )
    )
    llm: LLM = Node(LCAzureChatOpenAI, help="The LLM model to execute the input prompt")
    post_process: Function = Node(
        help=(
            "The function post-processor that post-processes LLM output prediction ."
            "It should take a string as input (this is the LLM output text) and return "
            "a dictionary, where the key should"
        )
    )

    @Node.auto(depends_on="prompt")
    def prompt_template(self):
        """Automatically wrap around param prompt. Can ignore"""
        return BasePromptComponent(template=self.prompt)

    def run(self, **kwargs) -> Document:
        """Run the chain of thought"""
        prompt = self.prompt_template(**kwargs).text
        response = self.llm(prompt).text
        response = self.post_process(response)

        return Document(response)

    def get_variables(self) -> List[str]:
        return []

    def __add__(self, next_thought: "Thought") -> "ManualSequentialChainOfThought":
        return ManualSequentialChainOfThought(
            thoughts=[self, next_thought], llm=self.llm
        )


class ManualSequentialChainOfThought(BaseComponent):
    """Perform sequential chain-of-thought with manual pre-defined prompts

    This method supports variable number of steps. Each step corresponds to a
    `kotaemon.pipelines.cot.Thought`. Please refer that section for
    Thought's detail. This section is about chaining thought together.

    _**Usage:**_

    **Create and run a chain of thought without "+" operator:**

    ```pycon
    >>> from kotaemon.pipelines.cot import Thought, ManualSequentialChainOfThought
    >>> llm = LCAzureChatOpenAI(...)
    >>> thought1 = Thought(
    >>>    prompt="Word {word} in {language} is ",
    >>>    post_process=lambda string: {"translated": string},
    >>> )
    >>> thought2 = Thought(
    >>>     prompt="Translate {translated} to Japanese",
    >>>     post_process=lambda string: {"output": string},
    >>> )
    >>> thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)
    >>> thought(word="hello", language="French")
    {'word': 'hello',
     'language': 'French',
     'translated': '"Bonjour"',
     'output': 'こんにちは (Konnichiwa)'}
    ```

    **Create and run a chain of thought without "+" operator:** Please refer the
    `kotaemon.pipelines.cot.Thought` section for examples.

    This chain-of-thought optionally takes a termination check callback function.
    This function will be called after each thought is executed. It takes in a
    dictionary of all thought outputs so far, and it returns True or False. If
    True, the chain-of-thought will terminate. If unset, the default callback always
    returns False.
    """

    thoughts: List[Thought] = Param(
        default_callback=lambda *_: [], help="List of Thought"
    )
    llm: LLM = Param(help="The LLM model to use (base of kotaemon.llms.BaseLLM)")
    terminate: Callable = Param(
        default=lambda _: False,
        help="Callback on terminate condition. Default to always return False",
    )

    def run(self, **kwargs) -> Document:
        """Run the manual chain of thought"""

        inputs = deepcopy(kwargs)
        for idx, thought in enumerate(self.thoughts):
            if self.llm:
                thought.llm = self.llm
            self._prepare_child(thought, f"thought{idx}")

            output = thought(**inputs)
            inputs.update(output.content)
            if self.terminate(inputs):
                break

        return Document(inputs)

    def __add__(self, next_thought: Thought) -> "ManualSequentialChainOfThought":
        return ManualSequentialChainOfThought(
            thoughts=self.thoughts + [next_thought], llm=self.llm
        )


================================================
FILE: libs/kotaemon/kotaemon/llms/linear.py
================================================
from typing import Any, Callable, Optional, Union

from ..base import BaseComponent
from ..base.schema import Document, IO_Type
from .chats import ChatLLM
from .completions import LLM
from .prompts import BasePromptComponent


class SimpleLinearPipeline(BaseComponent):
    """
    A simple pipeline for running a function with a prompt, a language model, and an
        optional post-processor.

    Attributes:
        prompt (BasePromptComponent): The prompt component used to generate the initial
            input.
        llm (Union[ChatLLM, LLM]): The language model component used to generate the
            output.
        post_processor (Union[BaseComponent, Callable[[IO_Type], IO_Type]]): An optional
            post-processor component or function.

    Example Usage:
        ```python
        from kotaemon.llms import LCAzureChatOpenAI, BasePromptComponent

        def identity(x):
            return x

        llm = LCAzureChatOpenAI(
            openai_api_base="your openai api base",
            openai_api_key="your openai api key",
            openai_api_version="your openai api version",
            deployment_name="dummy-q2-gpt35",
            temperature=0,
            request_timeout=600,
        )

        pipeline = SimpleLinearPipeline(
            prompt=BasePromptComponent(template="what is {word} in Japanese ?"),
            llm=llm,
            post_processor=identity,
        )
        print(pipeline(word="lone"))
        ```
    """

    prompt: BasePromptComponent
    llm: Union[ChatLLM, LLM]
    post_processor: Union[BaseComponent, Callable[[IO_Type], IO_Type]]

    def run(
        self,
        *,
        llm_kwargs: Optional[dict] = {},
        post_processor_kwargs: Optional[dict] = {},
        **prompt_kwargs,
    ):
        """
        Run the function with the given arguments and return the final output as a
            Document object.

        Args:
            llm_kwargs (dict): Keyword arguments for the llm call.
            post_processor_kwargs (dict): Keyword arguments for the post_processor.
            **prompt_kwargs: Keyword arguments for populating the prompt.

        Returns:
            Document: The final output of the function as a Document object.
        """
        prompt = self.prompt(**prompt_kwargs)
        llm_output = self.llm(prompt.text, **llm_kwargs)
        if self.post_processor is not None:
            final_output = self.post_processor(llm_output, **post_processor_kwargs)[0]
        else:
            final_output = llm_output

        return Document(final_output)


class GatedLinearPipeline(SimpleLinearPipeline):
    """
    A pipeline that extends the SimpleLinearPipeline class and adds a condition
        attribute.

    Attributes:
        condition (Callable[[IO_Type], Any]): A callable function that represents the
            condition.

    Usage:
        ```{.py3 title="Example Usage"}
        from kotaemon.llms import LCAzureChatOpenAI, BasePromptComponent
        from kotaemon.parsers import RegexExtractor

        def identity(x):
            return x

        llm = LCAzureChatOpenAI(
            openai_api_base="your openai api base",
            openai_api_key="your openai api key",
            openai_api_version="your openai api version",
            deployment_name="dummy-q2-gpt35",
            temperature=0,
            request_timeout=600,
        )

        pipeline = GatedLinearPipeline(
            prompt=BasePromptComponent(template="what is {word} in Japanese ?"),
            condition=RegexExtractor(pattern="some pattern"),
            llm=llm,
            post_processor=identity,
        )
        print(pipeline(condition_text="some pattern", word="lone"))
        print(pipeline(condition_text="other pattern", word="lone"))
        ```
    """

    condition: Callable[[IO_Type], Any]

    def run(
        self,
        *,
        condition_text: Optional[str] = None,
        llm_kwargs: Optional[dict] = {},
        post_processor_kwargs: Optional[dict] = {},
        **prompt_kwargs,
    ) -> Document:
        """
        Run the pipeline with the given arguments and return the final output as a
            Document object.

        Args:
            condition_text (str): The condition text to evaluate. Default to None.
            llm_kwargs (dict): Additional keyword arguments for the language model call.
            post_processor_kwargs (dict): Additional keyword arguments for the
                post-processor.
            **prompt_kwargs: Keyword arguments for populating the prompt.

        Returns:
            Document: The final output of the pipeline as a Document object.

        Raises:
            ValueError: If condition_text is None
        """
        if condition_text is None:
            raise ValueError("`condition_text` must be provided")

        if self.condition(condition_text)[0]:
            return super().run(
                llm_kwargs=llm_kwargs,
                post_processor_kwargs=post_processor_kwargs,
                **prompt_kwargs,
            )

        return Document(None)


================================================
FILE: libs/kotaemon/kotaemon/llms/prompts/__init__.py
================================================
from .base import BasePromptComponent
from .template import PromptTemplate

__all__ = ["BasePromptComponent", "PromptTemplate"]


================================================
FILE: libs/kotaemon/kotaemon/llms/prompts/base.py
================================================
from typing import Callable

from theflow import Param

from kotaemon.base import BaseComponent, Document

from .template import PromptTemplate


class BasePromptComponent(BaseComponent):
    """
    Base class for prompt components.

    Args:
        template (PromptTemplate): The prompt template.
        **kwargs: Any additional keyword arguments that will be used to populate the
            given template.
    """

    class Config:
        middleware_switches = {"theflow.middleware.CachingMiddleware": False}
        allow_extra = True

    template: str | PromptTemplate

    @Param.auto(depends_on="template")
    def template__(self):
        return (
            self.template
            if isinstance(self.template, PromptTemplate)
            else PromptTemplate(self.template)
        )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.__set(**kwargs)

    def __check_redundant_kwargs(self, **kwargs):
        """
        Check for redundant keyword arguments.

        Parameters:
            **kwargs (dict): A dictionary of keyword arguments.

        Raises:
            ValueError: If any keys provided are not in the template.

        Returns:
            None
        """
        self.template__.check_redundant_kwargs(**kwargs)

    def __check_unset_placeholders(self):
        """
        Check if all the placeholders in the template are set.

        This function checks if all the expected placeholders in the template are set as
            attributes of the object. If any placeholders are missing, a `ValueError`
            is raised with the names of the missing keys.

        Parameters:
            None

        Returns:
            None
        """
        self.template__.check_missing_kwargs(**self.__dict__)

    def __validate_value_type(self, **kwargs):
        """
        Validates the value types of the given keyword arguments.

        Parameters:
            **kwargs (dict): A dictionary of keyword arguments to be validated.

        Raises:
            ValueError: If any of the values in the kwargs dictionary have an
                unsupported type.

        Returns:
            None
        """
        type_error = []
        for k, v in kwargs.items():
            if k.startswith("template"):
                continue
            if not isinstance(v, (str, int, Document, Callable)):  # type: ignore
                type_error.append((k, type(v)))

        if type_error:
            raise ValueError(
                "Type of values must be either int, str, Document, Callable, "
                f"found unsupported type for (key, type): {type_error}"
            )

    def __set(self, **kwargs):
        """
        Set the values of the attributes in the object based on the provided keyword
            arguments.

        Args:
            kwargs (dict): A dictionary with the attribute names as keys and the new
                values as values.

        Returns:
            None
        """
        self.__check_redundant_kwargs(**kwargs)
        self.__validate_value_type(**kwargs)

        self.__dict__.update(kwargs)

    def __prepare_value(self):
        """
        Generate a dictionary of keyword arguments based on the template's placeholders
            and the current instance's attributes.

        Returns:
            dict: A dictionary of keyword arguments.
        """

        def __prepare(key, value):
            if isinstance(value, str):
                return value
            if isinstance(value, (int, Document)):
                return str(value)

            raise ValueError(
                f"Unsupported type {type(value)} for template value of key {key}"
            )

        kwargs = {}
        for k in self.template__.placeholders:
            v = getattr(self, k)

            # if get a callable, execute to get its output
            if isinstance(v, Callable):  # type: ignore[arg-type]
                v = v()

            if isinstance(v, list):
                v = str([__prepare(k, each) for each in v])
            elif isinstance(v, (str, int, Document)):
                v = __prepare(k, v)
            else:
                raise ValueError(
                    f"Unsupported type {type(v)} for template value of key `{k}`"
                )
            kwargs[k] = v

        return kwargs

    def set_value(self, **kwargs):
        """
        Similar to `__set` but for external use.

        Set the values of the attributes in the object based on the provided keyword
            arguments.

        Args:
            kwargs (dict): A dictionary with the attribute names as keys and the new
                values as values.

        Returns:
            None
        """
        self.__set(**kwargs)

    def run(self, **kwargs):
        """
        Run the function with the given keyword arguments.

        Args:
            **kwargs: The keyword arguments to pass to the function.

        Returns:
            The result of calling the `populate` method of the `template` object
            with the given keyword arguments.
        """
        self.__set(**kwargs)
        self.__check_unset_placeholders()
        prepared_kwargs = self.__prepare_value()

        text = self.template__.populate(**prepared_kwargs)
        return Document(text=text, metadata={"origin": "PromptComponent"})

    def flow(self):
        return self.__call__()


================================================
FILE: libs/kotaemon/kotaemon/llms/prompts/template.py
================================================
import warnings
from string import Formatter


class PromptTemplate:
    """
    Base class for prompt templates.
    """

    def __init__(self, template: str, ignore_invalid=True):
        template = template
        formatter = Formatter()
        parsed_template = list(formatter.parse(template))

        placeholders = set()
        for _, key, _, _ in parsed_template:
            if key is None:
                continue
            if not key.isidentifier():
                if ignore_invalid:
                    warnings.warn(f"Ignore invalid placeholder: {key}.", UserWarning)
                else:
                    raise ValueError(
                        "Placeholder name must be a valid Python identifier, found:"
                        f" {key}."
                    )
            placeholders.add(key)

        self.template = template
        self.placeholders = placeholders
        self.__formatter = formatter
        self.__parsed_template = parsed_template

    def check_missing_kwargs(self, **kwargs):
        """
        Check if all the placeholders in the template are set.

        This function checks if all the expected placeholders in the template are set as
            attributes of the object. If any placeholders are missing, a `ValueError`
            is raised with the names of the missing keys.

        Parameters:
            None

        Returns:
            None
        """
        missing_keys = self.placeholders.difference(kwargs.keys())
        if missing_keys:
            raise ValueError(f"Missing keys in template: {','.join(missing_keys)}")

    def check_redundant_kwargs(self, **kwargs):
        """
        Check if all the placeholders in the template are set.

        This function checks if all the expected placeholders in the template are set as
            attributes of the object. If any placeholders are missing, a `ValueError`
            is raised with the names of the missing keys.

        Parameters:
            None

        Returns:
            None
        """
        provided_keys = set(kwargs.keys())
        redundant_keys = provided_keys - self.placeholders

        if redundant_keys:
            warnings.warn(
                f"Keys provided but not in template: {','.join(redundant_keys)}",
                UserWarning,
            )

    def populate(self, safe=True, **kwargs) -> str:
        """
        Strictly populate the template with the given keyword arguments.

        Args:
            **kwargs: The keyword arguments to populate the template.
                      Each keyword corresponds to a placeholder in the template.

        Returns:
            The populated template.

        Raises:
            ValueError: If an unknown placeholder is provided.
        """
        if safe:
            self.check_missing_kwargs(**kwargs)

        return self.partial_populate(**kwargs)

    def partial_populate(self, **kwargs):
        """
        Partially populate the template with the given keyword arguments.

        Args:
            **kwargs: The keyword arguments to populate the template.
                      Each keyword corresponds to a placeholder in the template.

        Returns:
            str: The populated template.
        """
        self.check_redundant_kwargs(**kwargs)

        prompt = []
        for literal_text, field_name, format_spec, conversion in self.__parsed_template:
            prompt.append(literal_text)

            if field_name is None:
                continue

            if field_name not in kwargs:
                if conversion:
                    value = f"{{{field_name}}}!{conversion}:{format_spec}"
                else:
                    value = f"{{{field_name}:{format_spec}}}"
            else:
                value = kwargs[field_name]
                if conversion is not None:
                    value = self.__formatter.convert_field(value, conversion)
                if format_spec is not None:
                    value = self.__formatter.format_field(value, format_spec)

            prompt.append(value)

        return "".join(prompt)

    def __add__(self, other):
        """
        Create a new PromptTemplate object by concatenating the template of the current
            object with the template of another PromptTemplate object.

        Parameters:
            other (PromptTemplate): Another PromptTemplate object.

        Returns:
            PromptTemplate: A new PromptTemplate object with the concatenated templates.
        """
        return PromptTemplate(self.template + "\n" + other.template)


================================================
FILE: libs/kotaemon/kotaemon/loaders/__init__.py
================================================
from .adobe_loader import AdobeReader
from .azureai_document_intelligence_loader import AzureAIDocumentIntelligenceLoader
from .base import AutoReader, BaseReader
from .composite_loader import DirectoryReader
from .docling_loader import DoclingReader
from .docx_loader import DocxReader
from .excel_loader import ExcelReader, PandasExcelReader
from .html_loader import HtmlReader, MhtmlReader
from .mathpix_loader import MathpixPDFReader
from .ocr_loader import ImageReader, OCRReader
from .pdf_loader import PDFThumbnailReader
from .txt_loader import TxtReader
from .unstructured_loader import UnstructuredReader
from .web_loader import WebReader

__all__ = [
    "AutoReader",
    "AzureAIDocumentIntelligenceLoader",
    "BaseReader",
    "PandasExcelReader",
    "ExcelReader",
    "MathpixPDFReader",
    "ImageReader",
    "OCRReader",
    "DirectoryReader",
    "UnstructuredReader",
    "DocxReader",
    "HtmlReader",
    "MhtmlReader",
    "AdobeReader",
    "TxtReader",
    "PDFThumbnailReader",
    "WebReader",
    "DoclingReader",
]


================================================
FILE: libs/kotaemon/kotaemon/loaders/adobe_loader.py
================================================
import logging
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional

from decouple import config
from llama_index.core.readers.base import BaseReader

from kotaemon.base import Document

logger = logging.getLogger(__name__)

DEFAULT_VLM_ENDPOINT = (
    "{0}openai/deployments/{1}/chat/completions?api-version={2}".format(
        config("AZURE_OPENAI_ENDPOINT", default=""),
        "gpt-4-vision",
        config("OPENAI_API_VERSION", default=""),
    )
)


class AdobeReader(BaseReader):
    """Read PDF using the Adobe's PDF Services.
    Be able to extract text, table, and figure with high accuracy

    Example:
        ```python
        >> from kotaemon.loaders import AdobeReader
        >> reader = AdobeReader()
        >> documents = reader.load_data("path/to/pdf")
        ```
    Args:
        endpoint: URL to the Vision Language Model endpoint. If not provided,
        will use the default `kotaemon.loaders.adobe_loader.DEFAULT_VLM_ENDPOINT`

        max_figures_to_caption: an int decides how many figured will be captioned.
        The rest will be ignored (are indexed without captions).
    """

    def __init__(
        self,
        vlm_endpoint: Optional[str] = None,
        max_figures_to_caption: int = 100,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        """Init params"""
        super().__init__(*args)
        self.table_regex = r"/Table(\[\d+\])?$"
        self.figure_regex = r"/Figure(\[\d+\])?$"
        self.vlm_endpoint = vlm_endpoint or DEFAULT_VLM_ENDPOINT
        self.max_figures_to_caption = max_figures_to_caption

    def load_data(
        self, file: Path, extra_info: Optional[Dict] = None, **kwargs
    ) -> List[Document]:
        """Load data by calling to the Adobe's API

        Args:
            file (Path): Path to the PDF file

        Returns:
            List[Document]: list of documents extracted from the PDF file,
                includes 3 types: text, table, and image

        """
        from .utils.adobe import (
            generate_figure_captions,
            load_json,
            parse_figure_paths,
            parse_table_paths,
            request_adobe_service,
        )

        filename = file.name
        filepath = str(Path(file).resolve())
        output_path = request_adobe_service(file_path=str(file), output_path="")
        results_path = os.path.join(output_path, "structuredData.json")

        if not os.path.exists(results_path):
            logger.exception("Fail to parse the document.")
            return []

        data = load_json(results_path)

        texts = defaultdict(list)
        tables = []
        figures = []

        elements = data["elements"]
        for item_id, item in enumerate(elements):
            page_number = item.get("Page", -1) + 1
            item_path = item["Path"]
            item_text = item.get("Text", "")

            file_paths = [
                Path(output_path) / path for path in item.get("filePaths", [])
            ]
            prev_item = elements[item_id - 1]
            title = prev_item.get("Text", "")

            if re.search(self.table_regex, item_path):
                table_content = parse_table_paths(file_paths)
                if not table_content:
                    continue
                table_caption = (
                    table_content.replace("|", "").replace("---", "")
                    + f"\n(Table in Page {page_number}. {title})"
                )
                tables.append((page_number, table_content, table_caption))

            elif re.search(self.figure_regex, item_path):
                figure_caption = (
                    item_text + f"\n(Figure in Page {page_number}. {title})"
                )
                figure_content = parse_figure_paths(file_paths)
                if not figure_content:
                    continue
                figures.append([page_number, figure_content, figure_caption])

            else:
                if item_text and "Table" not in item_path and "Figure" not in item_path:
                    texts[page_number].append(item_text)

        # get figure caption using GPT-4V
        figure_captions = generate_figure_captions(
            self.vlm_endpoint,
            [item[1] for item in figures],
            self.max_figures_to_caption,
        )
        for item, caption in zip(figures, figure_captions):
            # update figure caption
            item[2] += " " + caption

        # Wrap elements with Document
        documents = []

        # join plain text elements
        for page_number, txts in texts.items():
            documents.append(
                Document(
                    text="\n".join(txts),
                    metadata={
                        "page_label": page_number,
                        "file_name": filename,
                        "file_path": filepath,
                        **(extra_info if extra_info else {}),
                    },
                )
            )

        # table elements
        for page_number, table_content, table_caption in tables:
            documents.append(
                Document(
                    text=table_content,
                    metadata={
                        "table_origin": table_content,
                        "type": "table",
                        "page_label": page_number,
                        "file_name": filename,
                        "file_path": filepath,
                        **(extra_info if extra_info else {}),
                    },
                    metadata_template="",
                    metadata_seperator="",
                )
            )

        # figure elements
        for page_number, figure_content, figure_caption in figures:
            documents.append(
                Document(
                    text=figure_caption,
                    metadata={
                        "image_origin": figure_content,
                        "type": "image",
                        "page_label": page_number,
                        "file_name": filename,
                        "file_path": filepath,
                        **(extra_info if extra_info else {}),
                    },
                    metadata_template="",
                    metadata_seperator="",
                )
            )
        return documents


================================================
FILE: libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
================================================
import base64
import os
from io import BytesIO
from pathlib import Path
from typing import Optional

from PIL import Image

from kotaemon.base import Document, Param

from .base import BaseReader
from .utils.adobe import generate_single_figure_caption


def crop_image(file_path: Path, bbox: list[float], page_number: int = 0) -> Image.Image:
    """Crop the image based on the bounding box

    Args:
        file_path (Path): path to the image file
        bbox (list[float]): bounding box of the image (in percentage [x0, y0, x1, y1])
        page_number (int, optional): page number of the image. Defaults to 0.

    Returns:
        Image.Image: cropped image
    """
    left, upper, right, lower = bbox

    left, right = min(left, right), max(left, right)
    upper, lower = min(upper, lower), max(upper, lower)

    img: Image.Image
    suffix = file_path.suffix.lower()
    if suffix == ".pdf":
        try:
            import fitz
        except ImportError:
            raise ImportError("Please install PyMuPDF: 'pip install PyMuPDF'")

        doc = fitz.open(file_path)
        page = doc.load_page(page_number)
        pm = page.get_pixmap(dpi=150)
        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
    elif suffix in [".tif", ".tiff"]:
        img = Image.open(file_path)
        img.seek(page_number)
    else:
        img = Image.open(file_path)

    return img.crop(
        (
            int(left * img.width),
            int(upper * img.height),
            int(right * img.width),
            int(lower * img.height),
        )
    )


class AzureAIDocumentIntelligenceLoader(BaseReader):
    """Utilize Azure AI Document Intelligence to parse document

    As of April 24, the supported file formats are: pdf, jpeg/jpg, png, bmp, tiff,
    heif, docx, xlsx, pptx and html.
    """

    _dependencies = ["azure-ai-documentintelligence", "PyMuPDF", "Pillow"]

    endpoint: str = Param(
        os.environ.get("AZUREAI_DOCUMENT_INTELLIGENT_ENDPOINT", None),
        help="Endpoint of Azure AI Document Intelligence",
    )
    credential: str = Param(
        os.environ.get("AZUREAI_DOCUMENT_INTELLIGENT_CREDENTIAL", None),
        help="Credential of Azure AI Document Intelligence",
    )
    model: str = Param(
        "prebuilt-layout",
        help=(
            "Model to use for document analysis. Default is prebuilt-layout. "
            "As of April 24, you can view the supported models [here]"
            "(https://learn.microsoft.com/en-us/azure/ai-services/"
            "document-intelligence/concept-model-overview?view=doc-intel-4.0.0"
            "#model-analysis-features)"
        ),
    )
    output_content_format: str = Param(
        "markdown",
        help="Output content format. Can be 'markdown' or 'text'.Default is markdown",
    )
    vlm_endpoint: str = Param(
        help=(
            "Default VLM endpoint for figure captioning. If not provided, will not "
            "caption the figures"
        )
    )
    figure_friendly_filetypes: list[str] = Param(
        [".pdf", ".jpeg", ".jpg", ".png", ".bmp", ".tiff", ".heif", ".tif"],
        help=(
            "File types that we can reliably open and extract figures. "
            "For files like .docx or .html, the visual layout may be different "
            "when viewed from different tools, hence we cannot use Azure DI "
            "location to extract figures."
        ),
    )
    cache_dir: str = Param(
        None,
        help="Directory to cache the downloaded files. Default is None",
    )

    @Param.auto(depends_on=["endpoint", "credential"])
    def client_(self):
        try:
            from azure.ai.documentintelligence import DocumentIntelligenceClient
            from azure.core.credentials import AzureKeyCredential
        except ImportError:
            raise ImportError("Please install azure-ai-documentintelligence")

        return DocumentIntelligenceClient(
            self.endpoint, AzureKeyCredential(self.credential)
        )

    def run(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)

    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        """Extract the input file, allowing multi-modal extraction"""
        metadata = extra_info or {}
        file_name = Path(file_path)
        with open(file_path, "rb") as fi:
            poller = self.client_.begin_analyze_document(
                self.model,
                body=fi,
                content_type="application/octet-stream",
                output_content_format=self.output_content_format,
            )
            result = poller.result()

        # the total text content of the document in `output_content_format` format
        text_content = result.content
        removed_spans: list[dict] = []

        # extract the figures
        figures = []
        for figure_desc in result.get("figures", []):
            if not self.vlm_endpoint:
                continue
            if file_path.suffix.lower() not in self.figure_friendly_filetypes:
                continue

            # read & crop the image
            page_number = figure_desc["boundingRegions"][0]["pageNumber"]
            page_width = result.pages[page_number - 1]["width"]
            page_height = result.pages[page_number - 1]["height"]
            polygon = figure_desc["boundingRegions"][0]["polygon"]
            xs = [polygon[i] for i in range(0, len(polygon), 2)]
            ys = [polygon[i] for i in range(1, len(polygon), 2)]
            bbox = [
                min(xs) / page_width,
                min(ys) / page_height,
                max(xs) / page_width,
                max(ys) / page_height,
            ]
            img = crop_image(file_path, bbox, page_number - 1)

            # convert the image into base64
            img_bytes = BytesIO()
            img.save(img_bytes, format="PNG")
            img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
            img_base64 = f"data:image/png;base64,{img_base64}"

            # caption the image
            caption = generate_single_figure_caption(
                figure=img_base64, vlm_endpoint=self.vlm_endpoint
            )

            # store the image into document
            figure_metadata = {
                "image_origin": img_base64,
                "type": "image",
                "page_label": page_number,
            }
            figure_metadata.update(metadata)

            figures.append(
                Document(
                    text=caption,
                    metadata=figure_metadata,
                )
            )
            removed_spans += figure_desc["spans"]

        # extract the tables
        tables = []
        for table_desc in result.get("tables", []):
            if not table_desc["spans"]:
                continue

            # convert the tables into markdown format
            boundingRegions = table_desc["boundingRegions"]
            if boundingRegions:
                page_number = boundingRegions[0]["pageNumber"]
            else:
                page_number = 1

            # store the tables into document
            offset = table_desc["spans"][0]["offset"]
            length = table_desc["spans"][0]["length"]
            table_metadata = {
                "type": "table",
                "page_label": page_number,
                "table_origin": text_content[offset : offset + length],
            }
            table_metadata.update(metadata)

            tables.append(
                Document(
                    text=text_content[offset : offset + length],
                    metadata=table_metadata,
                )
            )
            removed_spans += table_desc["spans"]
        # save the text content into markdown format
        if self.cache_dir is not None:
            with open(
                Path(self.cache_dir) / f"{file_name.stem}.md", "w", encoding="utf-8"
            ) as f:
                f.write(text_content)

        removed_spans = sorted(removed_spans, key=lambda x: x["offset"], reverse=True)
        for span in removed_spans:
            text_content = (
                text_content[: span["offset"]]
                + text_content[span["offset"] + span["length"] :]
            )

        return [Document(content=text_content, metadata=metadata)] + figures + tables


================================================
FILE: libs/kotaemon/kotaemon/loaders/base.py
================================================
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Type, Union

from kotaemon.base import BaseComponent, Document

if TYPE_CHECKING:
    from llama_index.core.readers.base import BaseReader as LIBaseReader


class BaseReader(BaseComponent):
    """The base class for all readers"""

    ...


class AutoReader(BaseReader):
    """General auto reader for a variety of files. (based on llama-hub)"""

    def __init__(self, reader_type: Union[str, Type["LIBaseReader"]]) -> None:
        """Init reader using string identifier or class name from llama-hub.

        When a string is given, first attempts a direct import from
        ``llama_index.readers.file`` (works in uv/pip-less venvs where the
        package is already installed). Falls back to the deprecated
        ``download_loader`` only if the direct import fails.
        """
        import importlib

        if isinstance(reader_type, str):
            # Try direct import first — avoids pip-install side-effect of
            # download_loader, which fails in venvs without pip (e.g. uv).
            try:
                module = importlib.import_module("llama_index.readers.file")
                reader_cls = getattr(module, reader_type)
                self._reader = reader_cls()
            except (ImportError, AttributeError):
                from llama_index.core import download_loader

                self._reader = download_loader(reader_type)()
        else:
            self._reader = reader_type()
        super().__init__()

    def load_data(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
        documents = self._reader.load_data(file=file, **kwargs)

        # convert Document to new base class from kotaemon
        converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
        return converted_documents

    def run(self, file: Union[Path, str], **kwargs: Any) -> List[Document]:
        return self.load_data(file=file, **kwargs)


class LIReaderMixin(BaseComponent):
    """Base wrapper around llama-index reader

    To use the LIBaseReader, you need to implement the _get_wrapped_class method to
    return the relevant llama-index reader class that you want to wrap.

    Example:

        ```python
        class DirectoryReader(LIBaseReader):
            def _get_wrapped_class(self) -> Type["BaseReader"]:
                from llama_index import SimpleDirectoryReader

                return SimpleDirectoryReader
        ```
    """

    def _get_wrapped_class(self) -> Type["LIBaseReader"]:
        raise NotImplementedError(
            "Please return the relevant llama-index class in in _get_wrapped_class"
        )

    def __init__(self, *args, **kwargs):
        self._reader_class = self._get_wrapped_class()
        self._reader = self._reader_class(*args, **kwargs)
        super().__init__()

    def __setattr__(self, name: str, value: Any) -> None:
        if name.startswith("_"):
            return super().__setattr__(name, value)

        return setattr(self._reader, name, value)

    def __getattr__(self, name: str) -> Any:
        return getattr(self._reader, name)

    def load_data(self, *args, **kwargs: Any) -> List[Document]:
        documents = self._reader.load_data(*args, **kwargs)

        # convert Document to new base class from kotaemon
        converted_documents = [Document.from_dict(doc.to_dict()) for doc in documents]
        return converted_documents

    def run(self, *args, **kwargs: Any) -> List[Document]:
        return self.load_data(*args, **kwargs)


================================================
FILE: libs/kotaemon/kotaemon/loaders/composite_loader.py
================================================
from typing import Callable, List, Optional, Type

from llama_index.core.readers.base import BaseReader as LIBaseReader

from .base import BaseReader, LIReaderMixin


class DirectoryReader(LIReaderMixin, BaseReader):
    """Wrap around llama-index SimpleDirectoryReader

    Args:
        input_dir (str): Path to the directory.
        input_files (List): List of file paths to read
            (Optional; overrides input_dir, exclude)
        exclude (List): glob of python file paths to exclude (Optional)
        exclude_hidden (bool): Whether to exclude hidden files (dotfiles).
        encoding (str): Encoding of the files.
            Default is utf-8.
        errors (str): how encoding and decoding errors are to be handled,
              see https://docs.python.org/3/library/functions.html#open
        recursive (bool): Whether to recursively search in subdirectories.
            False by default.
        filename_as_id (bool): Whether to use the filename as the document id.
            False by default.
        required_exts (Optional[List[str]]): List of required extensions.
            Default is None.
        file_extractor (Optional[Dict[str, BaseReader]]): A mapping of file
            extension to a BaseReader class that specifies how to convert that file
            to text. If not specified, use default from DEFAULT_FILE_READER_CLS.
        num_files_limit (Optional[int]): Maximum number of files to read.
            Default is None.
        file_metadata (Optional[Callable[str, Dict]]): A function that takes
            in a filename and returns a Dict of metadata for the Document.
            Default is None.
    """

    input_dir: Optional[str] = None
    input_files: Optional[List] = None
    exclude: Optional[List] = None
    exclude_hidden: bool = True
    errors: str = "ignore"
    recursive: bool = False
    encoding: str = "utf-8"
    filename_as_id: bool = False
    required_exts: Optional[list[str]] = None
    file_extractor: Optional[dict[str, "LIBaseReader"]] = None
    num_files_limit: Optional[int] = None
    file_metadata: Optional[Callable[[str], dict]] = None

    def _get_wrapped_class(self) -> Type["LIBaseReader"]:
        from llama_index.core import SimpleDirectoryReader

        return SimpleDirectoryReader


================================================
FILE: libs/kotaemon/kotaemon/loaders/docling_loader.py
================================================
import base64
from collections import defaultdict
from io import BytesIO
from pathlib import Path
from typing import List, Optional

from kotaemon.base import Document, Param

from .azureai_document_intelligence_loader import crop_image
from .base import BaseReader
from .utils.adobe import generate_single_figure_caption, make_markdown_table


class DoclingReader(BaseReader):
    """Using Docling to extract document structure and content"""

    _dependencies = ["docling"]

    vlm_endpoint: str = Param(
        help=(
            "Default VLM endpoint for figure captioning. "
            "If not provided, will not caption the figures"
        )
    )

    max_figure_to_caption: int = Param(
        100,
        help=(
            "The maximum number of figures to caption. "
            "The rest will be indexed without captions."
        ),
    )

    figure_friendly_filetypes: list[str] = Param(
        [".pdf", ".jpeg", ".jpg", ".png", ".bmp", ".tiff", ".heif", ".tif"],
        help=(
            "File types that we can reliably open and extract figures. "
            "For files like .docx or .html, the visual layout may be different "
            "when viewed from different tools, hence we cannot use Azure DI location "
            "to extract figures."
        ),
    )

    @Param.auto(cache=True)
    def converter_(self):
        try:
            from docling.document_converter import DocumentConverter
        except ImportError:
            raise ImportError("Please install docling: 'pip install docling'")

        return DocumentConverter()

    def run(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> List[Document]:
        return self.load_data(file_path, extra_info, **kwargs)

    def load_data(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> List[Document]:
        """Extract the input file, allowing multi-modal extraction"""

        metadata = extra_info or {}

        result = self.converter_.convert(file_path)
        result_dict = result.document.export_to_dict()

        file_path = Path(file_path)
        file_name = file_path.name

        # extract the figures
        figures = []
        gen_caption_count = 0
        for figure_obj in result_dict.get("pictures", []):
            if not self.vlm_endpoint:
                continue
            if file_path.suffix.lower() not in self.figure_friendly_filetypes:
                continue

            # retrieve extractive captions provided by docling
            caption_refs = [caption["$ref"] for caption in figure_obj["captions"]]
            extractive_captions = []
            for caption_ref in caption_refs:
                text_id = caption_ref.split("/")[-1]
                try:
                    caption_text = result_dict["texts"][int(text_id)]["text"]
                    extractive_captions.append(caption_text)
                except (ValueError, TypeError, IndexError) as e:
                    print(e)
                    continue

            # read & crop image
            page_number = figure_obj["prov"][0]["page_no"]

            try:
                page_number_text = str(page_number)
                page_width = result_dict["pages"][page_number_text]["size"]["width"]
                page_height = result_dict["pages"][page_number_text]["size"]["height"]

                bbox_obj = figure_obj["prov"][0]["bbox"]
                bbox: list[float] = [
                    bbox_obj["l"],
                    bbox_obj["t"],
                    bbox_obj["r"],
                    bbox_obj["b"],
                ]
                if bbox_obj["coord_origin"] == "BOTTOMLEFT":
                    bbox = self._convert_bbox_bl_tl(bbox, page_width, page_height)

                img = crop_image(file_path, bbox, page_number - 1)
            except KeyError as e:
                print(e, list(result_dict["pages"].keys()))
                continue

            # convert img to base64
            img_bytes = BytesIO()
            img.save(img_bytes, format="PNG")
            img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
            img_base64 = f"data:image/png;base64,{img_base64}"

            # generate the generative caption
            if gen_caption_count >= self.max_figure_to_caption:
                gen_caption = ""
            else:
                gen_caption_count += 1
                gen_caption = generate_single_figure_caption(
                    figure=img_base64, vlm_endpoint=self.vlm_endpoint
                )

            # join the extractive and generative captions
            caption = "\n".join(extractive_captions + [gen_caption])

            # store the image into document
            figure_metadata = {
                "image_origin": img_base64,
                "type": "image",
                "page_label": page_number,
                "file_name": file_name,
                "file_path": file_path,
            }
            figure_metadata.update(metadata)

            figures.append(
                Document(
                    text=caption,
                    metadata=figure_metadata,
                )
            )

        # extract the tables
        tables = []
        for table_obj in result_dict.get("tables", []):
            # convert the tables into markdown format
            markdown_table = self._parse_table(table_obj)
            caption_refs = [caption["$ref"] for caption in table_obj["captions"]]

            extractive_captions = []
            for caption_ref in caption_refs:
                text_id = caption_ref.split("/")[-1]
                try:
                    caption_text = result_dict["texts"][int(text_id)]["text"]
                    extractive_captions.append(caption_text)
                except (ValueError, TypeError, IndexError) as e:
                    print(e)
                    continue
            # join the extractive and generative captions
            caption = "\n".join(extractive_captions)
            markdown_table = f"{caption}\n{markdown_table}"

            page_number = table_obj["prov"][0].get("page_no", 1)

            table_metadata = {
                "type": "table",
                "page_label": page_number,
                "table_origin": markdown_table,
                "file_name": file_name,
                "file_path": file_path,
            }
            table_metadata.update(metadata)

            tables.append(
                Document(
                    text=markdown_table,
                    metadata=table_metadata,
                )
            )

        # join plain text elements
        texts = []
        page_number_to_text = defaultdict(list)

        for text_obj in result_dict["texts"]:
            page_number = text_obj["prov"][0].get("page_no", 1)
            page_number_to_text[page_number].append(text_obj["text"])

        for page_number, txts in page_number_to_text.items():
            texts.append(
                Document(
                    text="\n".join(txts),
                    metadata={
                        "page_label": page_number,
                        "file_name": file_name,
                        "file_path": file_path,
                        **metadata,
                    },
                )
            )

        return texts + tables + figures

    def _convert_bbox_bl_tl(
        self, bbox: list[float], page_width: int, page_height: int
    ) -> list[float]:
        """Convert bbox from bottom-left to top-left"""
        x0, y0, x1, y1 = bbox
        return [
            x0 / page_width,
            (page_height - y1) / page_height,
            x1 / page_width,
            (page_height - y0) / page_height,
        ]

    def _parse_table(self, table_obj: dict) -> str:
        """Convert docling table object to markdown table"""
        table_as_list: List[List[str]] = []
        grid = table_obj["data"]["grid"]
        for row in grid:
            table_as_list.append([])
            for cell in row:
                table_as_list[-1].append(cell["text"])

        return make_markdown_table(table_as_list)


================================================
FILE: libs/kotaemon/kotaemon/loaders/docx_loader.py
================================================
import unicodedata
from pathlib import Path
from typing import List, Optional

import pandas as pd
from llama_index.core.readers.base import BaseReader

from kotaemon.base import Document


class DocxReader(BaseReader):
    """Read Docx files that respect table, using python-docx library

    Reader behavior:
        - All paragraphs are extracted as a Document
        - Each table is extracted as a Document, rendered as a CSV string
        - The output is a list of Documents, concatenating the above
        (tables + paragraphs)
    """

    def __init__(self, *args, **kwargs):
        try:
            import docx  # noqa
        except ImportError:
            raise ImportError(
                "docx is not installed. "
                "Please install it using `pip install python-docx`"
            )

    def _load_single_table(self, table) -> List[List[str]]:
        """Extract content from tables. Return a list of columns: list[str]
        Some merged cells will share duplicated content.
        """
        n_row = len(table.rows)
        n_col = len(table.columns)

        arrays = [["" for _ in range(n_row)] for _ in range(n_col)]

        for i, row in enumerate(table.rows):
            for j, cell in enumerate(row.cells):
                arrays[j][i] = cell.text

        return arrays

    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> List[Document]:
        """Load data using Docx reader

        Args:
            file_path (Path): Path to .docx file

        Returns:
            List[Document]: list of documents extracted from the HTML file
        """
        import docx

        file_path = Path(file_path).resolve()

        doc = docx.Document(str(file_path))
        all_text = "\n".join(
            [unicodedata.normalize("NFKC", p.text) for p in doc.paragraphs]
        )
        pages = [all_text]  # 1 page only

        tables = []
        for t in doc.tables:
            # return list of columns: list of string
            arrays = self._load_single_table(t)

            tables.append(pd.DataFrame({a[0]: a[1:] for a in arrays}))

        extra_info = extra_info or {}

        # create output Document with metadata from table
        documents = [
            Document(
                text=table.to_csv(
                    index=False
                ).strip(),  # strip_special_chars_markdown()
                metadata={
                    "table_origin": table.to_csv(index=False),
                    "type": "table",
                    **extra_info,
                },
                metadata_template="",
                metadata_seperator="",
            )
            for table in tables  # page_id
        ]

        # create Document from non-table text
        documents.extend(
            [
                Document(
                    text=non_table_text.strip(),
                    metadata={"page_label": 1, **extra_info},
                )
                for _, non_table_text in enumerate(pages)
            ]
        )

        return documents


================================================
FILE: libs/kotaemon/kotaemon/loaders/excel_loader.py
================================================
"""Pandas Excel reader.

Pandas parser for .xlsx files.

"""
from pathlib import Path
from typing import Any, List, Optional, Union

from llama_index.core.readers.base import BaseReader

from kotaemon.base import Document


class PandasExcelReader(BaseReader):
    r"""Pandas-based CSV parser.

    Parses CSVs using the separator detection from Pandas `read_csv` function.
    If special parameters are required, use the `pandas_config` dict.

    Args:

        pandas_config (dict): Options for the `pandas.read_excel` function call.
            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
            for more information. Set to empty dict by default,
            this means defaults will be used.

    """

    def __init__(
        self,
        *args: Any,
        pandas_config: Optional[dict] = None,
        row_joiner: str = "\n",
        col_joiner: str = " ",
        **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
        self._pandas_config = pandas_config or {}
        self._row_joiner = row_joiner if row_joiner else "\n"
        self._col_joiner = col_joiner if col_joiner else " "

    def load_data(
        self,
        file: Path,
        include_sheetname: bool = False,
        sheet_name: Optional[Union[str, int, list]] = None,
        extra_info: Optional[dict] = None,
        **kwargs,
    ) -> List[Document]:
        """Parse file and extract values from a specific column.

        Args:
            file (Path): The path to the Excel file to read.
            include_sheetname (bool): Whether to include the sheet name in the output.
            sheet_name (Union[str, int, None]): The specific sheet to read from,
                default is None which reads all sheets.

        Returns:
            List[Document]: A list of`Document objects containing the
                values from the specified column in the Excel file.
        """
        import itertools

        try:
            import pandas as pd
        except ImportError:
            raise ImportError(
                "install pandas using `pip3 install pandas` to use this loader"
            )

        if sheet_name is not None:
            sheet_name = (
                [sheet_name] if not isinstance(sheet_name, list) else sheet_name
            )

        dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)
        sheet_names = dfs.keys()
        df_sheets = []

        for key in sheet_names:
            sheet = []
            if include_sheetname:
                sheet.append([key])
            dfs[key] = dfs[key].dropna(axis=0, how="all")
            dfs[key] = dfs[key].dropna(axis=0, how="all")
            dfs[key].fillna("", inplace=True)
            sheet.extend(dfs[key].values.astype(str).tolist())
            df_sheets.append(sheet)

        text_list = list(
            itertools.chain.from_iterable(df_sheets)
        )  # flatten list of lists

        output = [
            Document(
                text=self._row_joiner.join(
                    self._col_joiner.join(sublist) for sublist in text_list
                ),
                metadata=extra_info or {},
            )
        ]

        return output


class ExcelReader(BaseReader):
    r"""Spreadsheet exporter respecting multiple worksheets

    Parses CSVs using the separator detection from Pandas `read_csv` function.
    If special parameters are required, use the `pandas_config` dict.

    Args:

        pandas_config (dict): Options for the `pandas.read_excel` function call.
            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
            for more information. Set to empty dict by default,
            this means defaults will be used.

    """

    def __init__(
        self,
        *args: Any,
        pandas_config: Optional[dict] = None,
        row_joiner: str = "\n",
        col_joiner: str = " ",
        **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
        self._pandas_config = pandas_config or {}
        self._row_joiner = row_joiner if row_joiner else "\n"
        self._col_joiner = col_joiner if col_joiner else " "

    def load_data(
        self,
        file: Path,
        include_sheetname: bool = True,
        sheet_name: Optional[Union[str, int, list]] = None,
        extra_info: Optional[dict] = None,
        **kwargs,
    ) -> List[Document]:
        """Parse file and extract values from a specific column.

        Args:
            file (Path): The path to the Excel file to read.
            include_sheetname (bool): Whether to include the sheet name in the output.
            sheet_name (Union[str, int, None]): The specific sheet to read from,
                default is None which reads all sheets.

        Returns:
            List[Document]: A list of`Document objects containing the
                values from the specified column in the Excel file.
        """

        try:
            import pandas as pd
        except ImportError:
            raise ImportError(
                "install pandas using `pip3 install pandas` to use this loader"
            )

        if sheet_name is not None:
            sheet_name = (
                [sheet_name] if not isinstance(sheet_name, list) else sheet_name
            )

        # clean up input
        file = Path(file)
        extra_info = extra_info or {}

        dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)
        sheet_names = dfs.keys()
        output = []

        for idx, key in enumerate(sheet_names):
            dfs[key] = dfs[key].dropna(axis=0, how="all")
            dfs[key] = dfs[key].dropna(axis=0, how="all")
            dfs[key] = dfs[key].astype("object")
            dfs[key].fillna("", inplace=True)

            rows = dfs[key].values.astype(str).tolist()
            content = self._row_joiner.join(
                self._col_joiner.join(row).strip() for row in rows
            ).strip()
            if include_sheetname:
                content = f"(Sheet {key} of file {file.name})\n{content}"
            metadata = {"page_label": idx + 1, "sheet_name": key, **extra_info}
            output.append(Document(text=content, metadata=metadata))

        return output


================================================
FILE: libs/kotaemon/kotaemon/loaders/html_loader.py
================================================
import email
from pathlib import Path
from typing import Optional

from llama_index.core.readers.base import BaseReader
from theflow.settings import settings as flowsettings

from kotaemon.base import Document


class HtmlReader(BaseReader):
    """Reader HTML usimg html2text

    Reader behavior:
        - HTML is read with html2text.
        - All of the texts will be split by `page_break_pattern`
        - Each page is extracted as a Document
        - The output is a list of Documents

    Args:
        page_break_pattern (str): Pattern to split the HTML into pages
    """

    def __init__(self, page_break_pattern: Optional[str] = None, *args, **kwargs):
        try:
            import html2text  # noqa
        except ImportError:
            raise ImportError(
                "html2text is not installed. "
                "Please install it using `pip install html2text`"
            )

        self._page_break_pattern: Optional[str] = page_break_pattern
        super().__init__()

    def load_data(
        self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        """Load data using Html reader

        Args:
            file_path: path to HTML file
            extra_info: extra information passed to this reader during extracting data

        Returns:
            list[Document]: list of documents extracted from the HTML file
        """
        import html2text

        file_path = Path(file_path).resolve()

        with file_path.open("r") as f:
            html_text = "".join([line[:-1] for line in f.readlines()])

        # read HTML
        all_text = html2text.html2text(html_text)
        pages = (
            all_text.split(self._page_break_pattern)
            if self._page_break_pattern
            else [all_text]
        )

        extra_info = extra_info or {}

        # create Document from non-table text
        documents = [
            Document(
                text=page.strip(),
                metadata={"page_label": page_id + 1, **extra_info},
            )
            for page_id, page in enumerate(pages)
        ]

        return documents


class MhtmlReader(BaseReader):
    """Parse `MHTML` files with `BeautifulSoup`."""

    def __init__(
        self,
        cache_dir: Optional[str] = getattr(
            flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None
        ),
        open_encoding: Optional[str] = None,
        bs_kwargs: Optional[dict] = None,
        get_text_separator: str = "",
    ) -> None:
        """initialize with path, and optionally, file encoding to use, and any kwargs
        to pass to the BeautifulSoup object.

        Args:
            cache_dir: Path for markdwon format.
            file_path: Path to file to load.
            open_encoding: The encoding to use when opening the file.
            bs_kwargs: Any kwargs to pass to the BeautifulSoup object.
            get_text_separator: The separator to use when getting the text
                from the soup.
        """
        try:
            import bs4  # noqa:F401
        except ImportError:
            raise ImportError(
                "beautifulsoup4 package not found, please install it with "
                "`pip install beautifulsoup4`"
            )

        self.cache_dir = cache_dir
        self.open_encoding = open_encoding
        if bs_kwargs is None:
            bs_kwargs = {"features": "lxml"}
        self.bs_kwargs = bs_kwargs
        self.get_text_separator = get_text_separator

    def load_data(
        self, file_path: Path | str, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        """Load MHTML document into document objects."""

        from bs4 import BeautifulSoup

        extra_info = extra_info or {}
        metadata: dict = extra_info
        page = []
        file_name = Path(file_path)
        with open(file_path, "r", encoding=self.open_encoding) as f:
            message = email.message_from_string(f.read())
            parts = message.get_payload()

            if not isinstance(parts, list):
                parts = [message]

            for part in parts:
                if part.get_content_type() == "text/html":
                    html = part.get_payload(decode=True).decode()

                    soup = BeautifulSoup(html, **self.bs_kwargs)
                    text = soup.get_text(self.get_text_separator)

                    if soup.title:
                        title = str(soup.title.string)
                    else:
                        title = ""

                    metadata = {
                        "source": str(file_path),
                        "title": title,
                        **extra_info,
                    }
                    lines = [line for line in text.split("\n") if line.strip()]
                    text = "\n\n".join(lines)
                    if text:
                        page.append(text)
        # save the page into markdown format
        print(self.cache_dir)
        if self.cache_dir is not None:
            print(Path(self.cache_dir) / f"{file_name.stem}.md")
            with open(Path(self.cache_dir) / f"{file_name.stem}.md", "w") as f:
                f.write(page[0])

        return [Document(text="\n\n".join(page), metadata=metadata)]


================================================
FILE: libs/kotaemon/kotaemon/loaders/mathpix_loader.py
================================================
import json
import re
import time
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Union

import requests
from langchain.utils import get_from_dict_or_env
from llama_index.core.readers.base import BaseReader

from kotaemon.base import Document

from .utils.table import strip_special_chars_markdown


# MathpixPDFLoader implementation taken largely from Daniel Gross's:
# https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21
class MathpixPDFReader(BaseReader):
    """Load `PDF` files using `Mathpix` service."""

    def __init__(
        self,
        processed_file_format: str = "md",
        max_wait_time_seconds: int = 900,
        should_clean_pdf: bool = True,
        **kwargs: Any,
    ) -> None:
        """Initialize with a file path.

        Args:
            processed_file_format: a format of the processed file. Default is   "mmd".
            max_wait_time_seconds: a maximum time to wait for the response from
                the server. Default is 500.
            should_clean_pdf: a flag to clean the PDF file. Default is False.
            **kwargs: additional keyword arguments.
        """
        self.mathpix_api_key = get_from_dict_or_env(
            kwargs, "mathpix_api_key", "MATHPIX_API_KEY", default="empty"
        )
        self.mathpix_api_id = get_from_dict_or_env(
            kwargs, "mathpix_api_id", "MATHPIX_API_ID", default="empty"
        )
        self.processed_file_format = processed_file_format
        self.max_wait_time_seconds = max_wait_time_seconds
        self.should_clean_pdf = should_clean_pdf
        super().__init__()

    @property
    def _mathpix_headers(self) -> Dict[str, str]:
        return {"app_id": self.mathpix_api_id, "app_key": self.mathpix_api_key}

    @property
    def url(self) -> str:
        return "https://api.mathpix.com/v3/pdf"

    @property
    def data(self) -> dict:
        options = {
            "conversion_formats": {self.processed_file_format: True},
            "enable_tables_fallback": True,
        }
        return {"options_json": json.dumps(options)}

    def send_pdf(self, file_path) -> str:
        with open(file_path, "rb") as f:
            files = {"file": f}
            response = requests.post(
                self.url, headers=self._mathpix_headers, files=files, data=self.data
            )
        response_data = response.json()
        if "pdf_id" in response_data:
            pdf_id = response_data["pdf_id"]
            return pdf_id
        else:
            raise ValueError("Unable to send PDF to Mathpix.")

    def wait_for_processing(self, pdf_id: str) -> None:
        """Wait for processing to complete.

        Args:
            pdf_id: a PDF id.

        Returns: None
        """
        url = self.url + "/" + pdf_id
        for _ in range(0, self.max_wait_time_seconds, 5):
            response = requests.get(url, headers=self._mathpix_headers)
            response_data = response.json()
            status = response_data.get("status", None)
            print(
                f"Processing status: {status},"
                f"Progress: {response_data.get('percent_done', 0)}%"
            )

            if status == "completed":
                return
            elif status == "error":
                raise ValueError(f"Mathpix processing error: {response_data}")
            elif status in [
                "split",
                "processing",
            ]:  # Add handling for processing states
                time.sleep(5)
                continue
            else:
                print(f"Unknown status: {response_data}")
                time.sleep(5)

        raise TimeoutError(
            f"Processing did not complete within {self.max_wait_time_seconds} seconds"
        )

    def get_processed_pdf(self, pdf_id: str) -> str:
        self.wait_for_processing(pdf_id)
        url = f"{self.url}/{pdf_id}.{self.processed_file_format}"
        response = requests.get(url, headers=self._mathpix_headers)
        if response.status_code != 200:
            raise ValueError(f"Failed to get processed PDF: {response.text}")
        content = response.content.decode("utf-8")
        print(f"Retrieved content length: {len(content)}")  # Debug print
        return content

    def clean_pdf(self, contents: str) -> str:
        """Clean the PDF file.

        Args:
            contents: a PDF file contents.

        Returns:

        """
        contents = "\n".join(
            [line for line in contents.split("\n") if not line.startswith("![]")]
        )
        # replace \section{Title} with # Title
        contents = contents.replace("\\section{", "# ")
        # replace the "\" slash that Mathpix adds to escape $, %, (, etc.

        # http:// or https:// followed by anything but a closing paren
        url_regex = "http[s]?://[^)]+"
        markup_regex = r"\[]\(\s*({0})\s*\)".format(url_regex)
        contents = (
            contents.replace(r"\$", "$")
            .replace(r"\%", "%")
            .replace(r"\(", "(")
            .replace(r"\)", ")")
            .replace("$\\begin{array}", "")
            .replace("\\end{array}$", "")
            .replace("\\\\", "")
            .replace("\\text", "")
            .replace("}", "")
            .replace("{", "")
            .replace("\\mathrm", "")
        )
        contents = re.sub(markup_regex, "", contents)
        return contents

    def parse_markdown_text_to_tables(
        self, content: str
    ) -> tuple[list[tuple[int, str]], list[tuple[int, str]]]:
        """Parse markdown text to get tables and texts separately.

        Returns:
            Tuple of (tables, texts) where each is a list of (page_num, content) tuples
        """
        print("Starting markdown parsing...")
        print(f"Content length: {len(content)}")

        # Split by page markers if present
        pages = re.split(r"(?m)^# Page \d+\n", content)

        tables: list[tuple[int, str]] = []
        texts: list[tuple[int, str]] = []

        for page_num, page_content in enumerate(pages, 1):
            if not page_content.strip():
                continue

            # Extract tables from the page
            table_matches = re.findall(r"(\|[^\n]+\|(?:\n\|[^\n]+\|)*)", page_content)
            if table_matches:
                for table in table_matches:
                    tables.append(
                        (page_num, table.strip())
                    )  # Store as tuple with page number
                # Remove tables from page content
                page_content = re.sub(
                    r"(\|[^\n]+\|(?:\n\|[^\n]+\|)*)", "", page_content
                )

            # Split remaining content into meaningful chunks
            chunks = re.split(r"\n\s*\n", page_content)
            for chunk in chunks:
                if chunk.strip():
                    texts.append(
                        (page_num, chunk.strip())
                    )  # Store as tuple with page number

        print(f"Found {len(tables)} tables and {len(texts)} text sections")
        return tables, texts

    def load_data(
        self,
        file: Union[str, List[str], Path],
        extra_info: Optional[Dict] = None,
        **load_kwargs: Any,
    ) -> List[Document]:
        """Load data from file path."""
        file_path = Path(file) if isinstance(file, str) else file

        if "response_content" in load_kwargs:
            content = load_kwargs["response_content"]
        else:
            pdf_id = self.send_pdf(file_path)
            content = self.get_processed_pdf(pdf_id)

        if self.should_clean_pdf:
            content = self.clean_pdf(content)

        tables, texts = self.parse_markdown_text_to_tables(content)
        documents = []

        # Handle tables
        for page_num, table_content in tables:
            text = strip_special_chars_markdown(table_content)
            metadata = {
                "table_origin": table_content,
                "type": "table",
                "page_label": page_num,
                "page_number": page_num,
            }
            if extra_info:
                metadata.update(extra_info)
            documents.append(
                Document(
                    text=text,
                    metadata=metadata,
                    metadata_template="",
                    metadata_seperator="",
                )
            )

        # Handle text sections
        for page_num, text_content in texts:
            if not text_content.strip():
                continue
            metadata = {
                "source": str(file_path),
                "type": "text",
                "page_label": page_num,
                "page_number": page_num,
            }
            if extra_info:
                metadata.update(extra_info)
            documents.append(Document(text=text_content, metadata=metadata))

        # Fallback if no content was parsed
        if not documents and content.strip():
            metadata = {
                "source": str(file_path),
                "type": "text",
                "page_label": 1,
                "page_number": 1,
            }
            if extra_info:
                metadata.update(extra_info)
            documents.append(Document(text=content.strip(), metadata=metadata))

        return documents

    def lazy_load_data(
        self,
        file: Union[str, List[str], Path],
        extra_info: Optional[Dict] = None,
        **load_kwargs: Any,
    ) -> Generator[Document, None, None]:
        """Lazy load data from file path."""
        file_path = Path(file) if isinstance(file, str) else file

        if "response_content" in load_kwargs:
            content = load_kwargs["response_content"]
        else:
            pdf_id = self.send_pdf(file_path)
            print(f"PDF ID: {pdf_id}")
            content = self.get_processed_pdf(pdf_id)

        if self.should_clean_pdf:
            content = self.clean_pdf(content)

        tables, texts = self.parse_markdown_text_to_tables(content)

        # Handle tables
        for page_num, table_content in tables:  # Changed variable name for clarity
            text = strip_special_chars_markdown(table_content)  # Pass just the content
            metadata = {
                "table_origin": table_content,  # Use table_content here too
                "type": "table",
                "page_label": page_num,
                "page_number": page_num,
            }
            if extra_info:
                metadata.update(extra_info)
            yield Document(
                text=text,
                metadata=metadata,
                metadata_template="",
                metadata_seperator="",
            )

        # Handle text sections
        for page_num, text_content in texts:  # Changed variable name for clarity
            if not text_content.strip():
                continue
            metadata = {
                "source": str(file_path),
                "type": "text",
                "page_label": page_num,
                "page_number": page_num,
            }
            if extra_info:
                metadata.update(extra_info)
            yield Document(
                text=text_content, metadata=metadata
            )  # Use text_content directly

        # Fallback if no content was parsed
        if not (tables or texts) and content.strip():
            metadata = {
                "source": str(file_path),
                "type": "text",
                "page_label": 1,
                "page_number": 1,
            }
            if extra_info:
                metadata.update(extra_info)
            yield Document(text=content.strip(), metadata=metadata)

        print(f"Completed processing PDF: {file_path}")


================================================
FILE: libs/kotaemon/kotaemon/loaders/ocr_loader.py
================================================
import logging
import os
from pathlib import Path
from typing import List, Optional
from uuid import uuid4

import requests
from llama_index.core.readers.base import BaseReader
from tenacity import after_log, retry, stop_after_attempt, wait_exponential

from kotaemon.base import Document

from .utils.pdf_ocr import parse_ocr_output, read_pdf_unstructured
from .utils.table import strip_special_chars_markdown

logger = logging.getLogger(__name__)

DEFAULT_OCR_ENDPOINT = "http://127.0.0.1:8000/v2/ai/infer/"


@retry(
    stop=stop_after_attempt(6),
    wait=wait_exponential(multiplier=20, exp_base=2, min=1, max=1000),
    after=after_log(logger, logging.WARNING),
)
def tenacious_api_post(url, file_path, table_only, **kwargs):
    with file_path.open("rb") as content:
        files = {"input": content}
        data = {"job_id": uuid4(), "table_only": table_only}
        resp = requests.post(url=url, files=files, data=data, **kwargs)
        resp.raise_for_status()
    return resp


class OCRReader(BaseReader):
    """Read PDF using OCR, with high focus on table extraction

    Example:
        ```python
        >> from kotaemon.loaders import OCRReader
        >> reader = OCRReader()
        >> documents = reader.load_data("path/to/pdf")
        ```

    Args:
        endpoint: URL to FullOCR endpoint. If not provided, will look for
            environment variable `OCR_READER_ENDPOINT` or use the default
            `kotaemon.loaders.ocr_loader.DEFAULT_OCR_ENDPOINT`
            (http://127.0.0.1:8000/v2/ai/infer/)
        use_ocr: whether to use OCR to read text (e.g: from images, tables) in the PDF
            If False, only the table and text within table cells will be extracted.
    """

    def __init__(self, endpoint: Optional[str] = None, use_ocr=True):
        """Init the OCR reader with OCR endpoint (FullOCR pipeline)"""
        super().__init__()
        self.ocr_endpoint = endpoint or os.getenv(
            "OCR_READER_ENDPOINT", DEFAULT_OCR_ENDPOINT
        )
        self.use_ocr = use_ocr

    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> List[Document]:
        """Load data using OCR reader

        Args:
            file_path (Path): Path to PDF file
            debug_path (Path): Path to store debug image output
            artifact_path (Path): Path to OCR endpoints artifacts directory

        Returns:
            List[Document]: list of documents extracted from the PDF file
        """
        file_path = Path(file_path).resolve()

        # call the API from FullOCR endpoint
        if "response_content" in kwargs:
            # overriding response content if specified
            ocr_results = kwargs["response_content"]
        else:
            # call original API
            resp = tenacious_api_post(
                url=self.ocr_endpoint, file_path=file_path, table_only=not self.use_ocr
            )
            ocr_results = resp.json()["result"]

        debug_path = kwargs.pop("debug_path", None)
        artifact_path = kwargs.pop("artifact_path", None)

        # read PDF through normal reader (unstructured)
        pdf_page_items = read_pdf_unstructured(file_path)
        # merge PDF text output with OCR output
        tables, texts = parse_ocr_output(
            ocr_results,
            pdf_page_items,
            debug_path=debug_path,
            artifact_path=artifact_path,
        )
        extra_info = extra_info or {}

        # create output Document with metadata from table
        documents = [
            Document(
                text=strip_special_chars_markdown(table_text),
                metadata={
                    "table_origin": table_text,
                    "type": "table",
                    "page_label": page_id + 1,
                    **extra_info,
                },
                metadata_template="",
                metadata_seperator="",
            )
            for page_id, table_text in tables
        ]
        # create Document from non-table text
        documents.extend(
            [
                Document(
                    text=non_table_text,
                    metadata={"page_label": page_id + 1, **extra_info},
                )
                for page_id, non_table_text in texts
            ]
        )

        return documents


class ImageReader(BaseReader):
    """Read PDF using OCR, with high focus on table extraction

    Example:
        ```python
        >> from knowledgehub.loaders import OCRReader
        >> reader = OCRReader()
        >> documents = reader.load_data("path/to/pdf")
        ```

    Args:
        endpoint: URL to FullOCR endpoint. If not provided, will look for
            environment variable `OCR_READER_ENDPOINT` or use the default
            `knowledgehub.loaders.ocr_loader.DEFAULT_OCR_ENDPOINT`
            (http://127.0.0.1:8000/v2/ai/infer/)
        use_ocr: whether to use OCR to read text (e.g: from images, tables) in the PDF
            If False, only the table and text within table cells will be extracted.
    """

    def __init__(self, endpoint: Optional[str] = None):
        """Init the OCR reader with OCR endpoint (FullOCR pipeline)"""
        super().__init__()
        self.ocr_endpoint = endpoint or os.getenv(
            "OCR_READER_ENDPOINT", DEFAULT_OCR_ENDPOINT
        )

    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> List[Document]:
        """Load data using OCR reader

        Args:
            file_path (Path): Path to PDF file
            debug_path (Path): Path to store debug image output
            artifact_path (Path): Path to OCR endpoints artifacts directory

        Returns:
            List[Document]: list of documents extracted from the PDF file
        """
        file_path = Path(file_path).resolve()

        # call the API from FullOCR endpoint
        if "response_content" in kwargs:
            # overriding response content if specified
            ocr_results = kwargs["response_content"]
        else:
            # call original API
            resp = tenacious_api_post(
                url=self.ocr_endpoint, file_path=file_path, table_only=False
            )
            ocr_results = resp.json()["result"]

        extra_info = extra_info or {}
        result = []
        for ocr_result in ocr_results:
            result.append(
                Document(
                    content=ocr_result["csv_string"],
                    metadata=extra_info,
                )
            )

        return result


================================================
FILE: libs/kotaemon/kotaemon/loaders/pdf_loader.py
================================================
import base64
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional

from decouple import config
from fsspec import AbstractFileSystem
from llama_index.readers.file import PDFReader
from PIL import Image

from kotaemon.base import Document

PDF_LOADER_DPI = config("PDF_LOADER_DPI", default=40, cast=int)


def get_page_thumbnails(
    file_path: Path, pages: list[int], dpi: int = PDF_LOADER_DPI
) -> List[Image.Image]:
    """Get image thumbnails of the pages in the PDF file.

    Args:
        file_path (Path): path to the image file
        page_number (list[int]): list of page numbers to extract

    Returns:
        list[Image.Image]: list of page thumbnails
    """

    img: Image.Image
    suffix = file_path.suffix.lower()
    assert suffix == ".pdf", "This function only supports PDF files."
    try:
        import fitz
    except ImportError:
        raise ImportError("Please install PyMuPDF: 'pip install PyMuPDF'")

    doc = fitz.open(file_path)

    output_imgs = []
    for page_number in pages:
        page = doc.load_page(page_number)
        pm = page.get_pixmap(dpi=dpi)
        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
        output_imgs.append(convert_image_to_base64(img))

    return output_imgs


def convert_image_to_base64(img: Image.Image) -> str:
    # convert the image into base64
    img_bytes = BytesIO()
    img.save(img_bytes, format="PNG")
    img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
    img_base64 = f"data:image/png;base64,{img_base64}"

    return img_base64


class PDFThumbnailReader(PDFReader):
    """PDF parser with thumbnail for each page."""

    def __init__(self) -> None:
        """
        Initialize PDFReader.
        """
        super().__init__(return_full_document=False)

    def load_data(
        self,
        file: Path,
        extra_info: Optional[Dict] = None,
        fs: Optional[AbstractFileSystem] = None,
    ) -> List[Document]:
        """Parse file."""
        documents = super().load_data(file, extra_info, fs)

        page_numbers_str = []
        filtered_docs = []
        is_int_page_number: dict[str, bool] = {}

        for doc in documents:
            if "page_label" in doc.metadata:
                page_num_str = doc.metadata["page_label"]
                page_numbers_str.append(page_num_str)
                try:
                    _ = int(page_num_str)
                    is_int_page_number[page_num_str] = True
                    filtered_docs.append(doc)
                except ValueError:
                    is_int_page_number[page_num_str] = False
                    continue

        documents = filtered_docs
        page_numbers = list(range(len(page_numbers_str)))

        print("Page numbers:", len(page_numbers))
        page_thumbnails = get_page_thumbnails(file, page_numbers)

        documents.extend(
            [
                Document(
                    text="Page thumbnail",
                    metadata={
                        "image_origin": page_thumbnail,
                        "type": "thumbnail",
                        "page_label": page_number,
                        **(extra_info if extra_info is not None else {}),
                    },
                )
                for (page_thumbnail, page_number) in zip(
                    page_thumbnails, page_numbers_str
                )
                if is_int_page_number[page_number]
            ]
        )

        return documents


================================================
FILE: libs/kotaemon/kotaemon/loaders/txt_loader.py
================================================
from pathlib import Path
from typing import Optional

from kotaemon.base import Document

from .base import BaseReader


class TxtReader(BaseReader):
    def run(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)

    def load_data(
        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()

        metadata = extra_info or {}
        return [Document(text=text, metadata=metadata)]


================================================
FILE: libs/kotaemon/kotaemon/loaders/unstructured_loader.py
================================================
"""Unstructured file reader.

A parser for unstructured text files using Unstructured.io.
Supports .txt, .docx, .pptx, .jpg, .png, .eml, .html, and .pdf documents.

To use .doc and .xls parser, install

sudo apt-get install -y libmagic-dev poppler-utils libreoffice
pip install xlrd

"""
from pathlib import Path
from typing import Any, Dict, List, Optional

from llama_index.core.readers.base import BaseReader

from kotaemon.base import Document


class UnstructuredReader(BaseReader):
    """General unstructured text reader for a variety of files."""

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        """Init params."""
        super().__init__(*args)  # not passing kwargs to parent bc it cannot accept it

        self.api = False  # we default to local
        if "url" in kwargs:
            self.server_url = str(kwargs["url"])
            self.api = True  # is url was set, switch to api
        else:
            self.server_url = "http://localhost:8000"

        if "api" in kwargs:
            self.api = kwargs["api"]

        self.api_key = ""
        if "api_key" in kwargs:
            self.api_key = kwargs["api_key"]

    """ Loads data using Unstructured.io

        Depending on the construction if url is set or api = True
        it'll parse file using API call, else parse it locally
        additional_metadata is extended by the returned metadata if
        split_documents is True

        Returns list of documents
    """

    def load_data(
        self,
        file: Path,
        extra_info: Optional[Dict] = None,
        split_documents: Optional[bool] = False,
        **kwargs,
    ) -> List[Document]:
        """If api is set, parse through api"""
        file_path_str = str(file)
        if self.api:
            from unstructured.partition.api import partition_via_api

            elements = partition_via_api(
                filename=file_path_str,
                api_key=self.api_key,
                api_url=self.server_url + "/general/v0/general",
            )
        else:
            """Parse file locally"""
            from unstructured.partition.auto import partition

            elements = partition(filename=file_path_str)

        """ Process elements """
        docs = []
        file_name = Path(file).name
        file_path = str(Path(file).resolve())
        if split_documents:
            for node in elements:
                metadata = {"file_name": file_name, "file_path": file_path}
                if hasattr(node, "metadata"):
                    """Load metadata fields"""
                    for field, val in vars(node.metadata).items():
                        if field == "_known_field_names":
                            continue
                        # removing coordinates because it does not serialize
                        # and dont want to bother with it
                        if field == "coordinates":
                            continue
                        # removing bc it might cause interference
                        if field == "parent_id":
                            continue
                        metadata[field] = val

                if extra_info is not None:
                    metadata.update(extra_info)

                metadata["file_name"] = file_name
                docs.append(Document(text=node.text, metadata=metadata))

        else:
            text_chunks = [" ".join(str(el).split()) for el in elements]
            metadata = {"file_name": file_name, "file_path": file_path}

            if extra_info is not None:
                metadata.update(extra_info)

            # Create a single document by joining all the texts
            docs.append(Document(text="\n\n".join(text_chunks), metadata=metadata))

        return docs


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/__init__.py
================================================


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/adobe.py
================================================
# need pip install pdfservices-sdk==2.3.0

import base64
import json
import logging
import os
import tempfile
import zipfile
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import List, Union

import pandas as pd
from decouple import config

from kotaemon.loaders.utils.gpt4v import generate_gpt4v


def request_adobe_service(file_path: str, output_path: str = "") -> str:
    """Main function to call the adobe service, and unzip the results.
    Args:
        file_path (str): path to the pdf file
        output_path (str): path to store the results

    Returns:
        output_path (str): path to the results

    """
    try:
        from adobe.pdfservices.operation.auth.credentials import Credentials
        from adobe.pdfservices.operation.exception.exceptions import (
            SdkException,
            ServiceApiException,
            ServiceUsageException,
        )
        from adobe.pdfservices.operation.execution_context import ExecutionContext
        from adobe.pdfservices.operation.io.file_ref import FileRef
        from adobe.pdfservices.operation.pdfops.extract_pdf_operation import (
            ExtractPDFOperation,
        )
        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_element_type import (  # noqa: E501
            ExtractElementType,
        )
        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_pdf_options import (  # noqa: E501
            ExtractPDFOptions,
        )
        from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_renditions_element_type import (  # noqa: E501
            ExtractRenditionsElementType,
        )
    except ImportError:
        raise ImportError(
            "pdfservices-sdk is not installed. "
            "Please install it by running `pip install pdfservices-sdk"
            "@git+https://github.com/niallcm/pdfservices-python-sdk.git"
            "@bump-and-unfreeze-requirements`"
        )

    if not output_path:
        output_path = tempfile.mkdtemp()

    try:
        # Initial setup, create credentials instance.
        credentials = (
            Credentials.service_principal_credentials_builder()
            .with_client_id(config("PDF_SERVICES_CLIENT_ID", default=""))
            .with_client_secret(config("PDF_SERVICES_CLIENT_SECRET", default=""))
            .build()
        )

        # Create an ExecutionContext using credentials
        # and create a new operation instance.
        execution_context = ExecutionContext.create(credentials)
        extract_pdf_operation = ExtractPDFOperation.create_new()

        # Set operation input from a source file.
        source = FileRef.create_from_local_file(file_path)
        extract_pdf_operation.set_input(source)

        # Build ExtractPDF options and set them into the operation
        extract_pdf_options: ExtractPDFOptions = (
            ExtractPDFOptions.builder()
            .with_elements_to_extract(
                [ExtractElementType.TEXT, ExtractElementType.TABLES]
            )
            .with_elements_to_extract_renditions(
                [
                    ExtractRenditionsElementType.TABLES,
                    ExtractRenditionsElementType.FIGURES,
                ]
            )
            .build()
        )
        extract_pdf_operation.set_options(extract_pdf_options)

        # Execute the operation.
        result: FileRef = extract_pdf_operation.execute(execution_context)

        # Save the result to the specified location.
        zip_file_path = os.path.join(
            output_path, "ExtractTextTableWithFigureTableRendition.zip"
        )
        result.save_as(zip_file_path)
        # Open the ZIP file
        with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
            # Extract all contents to the destination folder
            zip_ref.extractall(output_path)
    except (ServiceApiException, ServiceUsageException, SdkException):
        logging.exception("Exception encountered while executing operation")

    return output_path


def make_markdown_table(table_as_list: List[List[str]]) -> str:
    """
    Convert table from python list representation to markdown format.
    The input list consists of rows of tables, the first row is the header.

    Args:
        table_as_list: list of table rows
            Example: [["Name", "Age", "Height"],
                    ["Jake", 20, 5'10],
                    ["Mary", 21, 5'7]]
    Returns:
        markdown representation of the table
    """
    markdown = "\n" + str("| ")

    for e in table_as_list[0]:
        to_add = " " + str(e) + str(" |")
        markdown += to_add
    markdown += "\n"

    markdown += "| "
    for i in range(len(table_as_list[0])):
        markdown += str("--- | ")
    markdown += "\n"

    for entry in table_as_list[1:]:
        markdown += str("| ")
        for e in entry:
            to_add = str(e) + str(" | ")
            markdown += to_add
        markdown += "\n"

    return markdown + "\n"


def load_json(input_path: Union[str | Path]) -> dict:
    """Load json file"""
    with open(input_path, "r") as fi:
        data = json.load(fi)

    return data


def load_excel(input_path: Union[str | Path]) -> str:
    """Load excel file and convert to markdown"""

    df = pd.read_excel(input_path).fillna("")
    # Convert dataframe to a list of rows
    row_list = [df.columns.values.tolist()] + df.values.tolist()

    for item_id, item in enumerate(row_list[0]):
        if "Unnamed" in item:
            row_list[0][item_id] = ""

    for row in row_list:
        for item_id, item in enumerate(row):
            row[item_id] = str(item).replace("_x000D_", " ").replace("\n", " ").strip()

    markdown_str = make_markdown_table(row_list)
    return markdown_str


def encode_image_base64(image_path: Union[str | Path]) -> Union[bytes, str]:
    """Convert image to base64"""

    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def parse_table_paths(file_paths: List[Path]) -> str:
    """Read the table stored in an excel file given the file path"""

    content = ""
    for path in file_paths:
        if path.suffix == ".xlsx":
            content = load_excel(path)
            break
    return content


def parse_figure_paths(file_paths: List[Path]) -> Union[bytes, str]:
    """Read and convert an image to base64 given the image path"""

    content = ""
    for path in file_paths:
        if path.suffix == ".png":
            base64_image = encode_image_base64(path)
            content = f"data:image/png;base64,{base64_image}"  # type: ignore
            break
    return content


def generate_single_figure_caption(vlm_endpoint: str, figure: str) -> str:
    output = ""

    """Summarize a single figure using GPT-4V"""
    if figure:
        try:
            output = generate_gpt4v(
                endpoint=vlm_endpoint,
                prompt="Provide a short 2 sentence summary of this image?",
                images=figure,
            )
            if "sorry" in output.lower():
                output = ""
        except Exception as e:
            print(f"Error generating caption: {e}")

    return output


def generate_figure_captions(
    vlm_endpoint: str, figures: List, max_figures_to_process: int
) -> List:
    """Summarize several figures using GPT-4V.
    Args:
        vlm_endpoint (str): endpoint to the vision language model service
        figures (List): list of base64 images
        max_figures_to_process (int): the maximum number of figures will be summarized,
        the rest are ignored.

    Returns:
        results (List[str]): list of all figure captions and empty strings for
        ignored figures.
    """
    to_gen_figures = figures[:max_figures_to_process]
    other_figures = figures[max_figures_to_process:]

    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(
                lambda: generate_single_figure_caption(vlm_endpoint, figure)
            )
            for figure in to_gen_figures
        ]

    results = [future.result() for future in futures]
    return results + [""] * len(other_figures)


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/box.py
================================================
from typing import List, Tuple


def bbox_to_points(box: List[int]):
    """Convert bounding box to list of points"""
    x1, y1, x2, y2 = box
    return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]


def points_to_bbox(points: List[Tuple[int, int]]):
    """Convert list of points to bounding box"""
    all_x = [p[0] for p in points]
    all_y = [p[1] for p in points]
    return [min(all_x), min(all_y), max(all_x), max(all_y)]


def scale_points(points: List[Tuple[int, int]], scale_factor: float = 1.0):
    """Scale points by a scale factor"""
    return [(int(pos[0] * scale_factor), int(pos[1] * scale_factor)) for pos in points]


def union_points(points: List[Tuple[int, int]]):
    """Return union bounding box of list of points"""
    all_x = [p[0] for p in points]
    all_y = [p[1] for p in points]
    bbox = (min(all_x), min(all_y), max(all_x), max(all_y))
    return bbox


def scale_box(box: List[int], scale_factor: float = 1.0):
    """Scale box by a scale factor"""
    return [int(pos * scale_factor) for pos in box]


def box_h(box: List[int]):
    "Return box height"
    return box[3] - box[1]


def box_w(box: List[int]):
    "Return box width"
    return box[2] - box[0]


def box_area(box: List[int]):
    "Return box area"
    x1, y1, x2, y2 = box
    return (x2 - x1) * (y2 - y1)


def get_rect_iou(gt_box: List[tuple], pd_box: List[tuple], iou_type=0) -> int:
    """Intersection over union on layout rectangle

    Args:
        gt_box: List[tuple]
            A list contains bounding box coordinates of ground truth
        pd_box: List[tuple]
            A list contains bounding box coordinates of prediction
        iou_type: int
            0: intersection / union, normal IOU
            1: intersection / min(areas), useful when boxes are under/over-segmented

        Input format: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
        Annotation for each element in bbox:
        (x1, y1)        (x2, y1)
            +-------+
            |       |
            |       |
            +-------+
        (x1, y2)        (x2, y2)

    Returns:
        Intersection over union value
    """

    assert iou_type in [0, 1], "Only support 0: origin iou, 1: intersection / min(area)"

    # determine the (x, y)-coordinates of the intersection rectangle
    # gt_box: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
    # pd_box: [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
    x_left = max(gt_box[0][0], pd_box[0][0])
    y_top = max(gt_box[0][1], pd_box[0][1])
    x_right = min(gt_box[2][0], pd_box[2][0])
    y_bottom = min(gt_box[2][1], pd_box[2][1])

    # compute the area of intersection rectangle
    interArea = max(0, x_right - x_left) * max(0, y_bottom - y_top)

    # compute the area of both the prediction and ground-truth
    # rectangles
    gt_area = (gt_box[2][0] - gt_box[0][0]) * (gt_box[2][1] - gt_box[0][1])
    pd_area = (pd_box[2][0] - pd_box[0][0]) * (pd_box[2][1] - pd_box[0][1])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the intersection area
    if iou_type == 0:
        iou = interArea / float(gt_area + pd_area - interArea)
    elif iou_type == 1:
        iou = interArea / max(min(gt_area, pd_area), 1)

    # return the intersection over union value
    return iou


def sort_funsd_reading_order(lines: List[dict], box_key_name: str = "box"):
    """Sort cell list to create the right reading order using their locations

    Args:
        lines: list of cells to sort

    Returns:
        a list of cell lists in the right reading order that contain
        no key or start with a key and contain no other key
    """
    sorted_list = []

    if len(lines) == 0:
        return lines

    while len(lines) > 1:
        topleft_line = lines[0]
        for line in lines[1:]:
            topleft_line_pos = topleft_line[box_key_name]
            topleft_line_center_y = (topleft_line_pos[1] + topleft_line_pos[3]) / 2
            x1, y1, x2, y2 = line[box_key_name]
            box_center_x = (x1 + x2) / 2
            box_center_y = (y1 + y2) / 2
            cell_h = y2 - y1
            if box_center_y <= topleft_line_center_y - cell_h / 2:
                topleft_line = line
                continue
            if (
                box_center_x < topleft_line_pos[2]
                and box_center_y < topleft_line_pos[3]
            ):
                topleft_line = line
                continue
        sorted_list.append(topleft_line)
        lines.remove(topleft_line)

    sorted_list.append(lines[0])

    return sorted_list


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
================================================
import json
import logging
from typing import Any, List

import requests
from decouple import config

logger = logging.getLogger(__name__)


def generate_gpt4v(
    endpoint: str,
    images: str | List[str],
    prompt: str,
    max_tokens: int = 512,
    max_images: int = 10,
) -> str:
    # OpenAI API Key
    api_key = config("AZURE_OPENAI_API_KEY", default="")
    headers = {"Content-Type": "application/json", "api-key": api_key}

    if isinstance(images, str):
        images = [images]

    payload = {
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                ]
                + [
                    {
                        "type": "image_url",
                        "image_url": {"url": image},
                    }
                    for image in images[:max_images]
                ],
            }
        ],
        "max_tokens": max_tokens,
        "temperature": 0,
    }

    if len(images) > max_images:
        print(f"Truncated to {max_images} images (original {len(images)} images")

    response = requests.post(endpoint, headers=headers, json=payload)

    try:
        response.raise_for_status()
    except Exception as e:
        logger.exception(f"Error generating gpt4v: {response.text}; error {e}")
        return ""

    output = response.json()
    output = output["choices"][0]["message"]["content"]
    return output


def stream_gpt4v(
    endpoint: str,
    images: str | List[str],
    prompt: str,
    max_tokens: int = 512,
    max_images: int = 10,
) -> Any:
    # OpenAI API Key
    api_key = config("AZURE_OPENAI_API_KEY", default="")
    headers = {"Content-Type": "application/json", "api-key": api_key}

    if isinstance(images, str):
        images = [images]

    payload = {
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                ]
                + [
                    {
                        "type": "image_url",
                        "image_url": {"url": image},
                    }
                    for image in images[:max_images]
                ],
            }
        ],
        "max_tokens": max_tokens,
        "stream": True,
        "logprobs": True,
        "temperature": 0,
    }
    if len(images) > max_images:
        print(f"Truncated to {max_images} images (original {len(images)} images")
    try:
        response = requests.post(endpoint, headers=headers, json=payload, stream=True)
        assert response.status_code == 200, str(response.content)
        output = ""
        logprobs = []
        for line in response.iter_lines():
            if line:
                if line.startswith(b"\xef\xbb\xbf"):
                    line = line[9:]
                else:
                    line = line[6:]
                try:
                    if line == "[DONE]":
                        break
                    line = json.loads(line.decode("utf-8"))
                except Exception:
                    break
                if len(line["choices"]):
                    if line["choices"][0].get("logprobs") is None:
                        _logprobs = []
                    else:
                        _logprobs = [
                            logprob["logprob"]
                            for logprob in line["choices"][0]["logprobs"].get(
                                "content", []
                            )
                        ]

                    output += line["choices"][0]["delta"].get("content", "")
                    logprobs += _logprobs
                    yield line["choices"][0]["delta"].get("content", ""), _logprobs

    except Exception as e:
        logger.error(f"Error streaming gpt4v {e}")
        logprobs = []
        output = ""

    return output, logprobs


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/pdf_ocr.py
================================================
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Optional, Union

from .box import (
    bbox_to_points,
    box_area,
    box_h,
    box_w,
    get_rect_iou,
    points_to_bbox,
    scale_box,
    scale_points,
    sort_funsd_reading_order,
    union_points,
)
from .table import table_cells_to_markdown

IOU_THRES = 0.5
PADDING_THRES = 1.1


def read_pdf_unstructured(input_path: Union[Path, str]):
    """Convert PDF from specified path to list of text items with
    location information

    Args:
        input_path: path to input file

    Returns:
        Dict page_number: list of text boxes
    """
    try:
        from unstructured.partition.auto import partition
    except ImportError as e:
        raise ImportError(
            "Please install unstructured PDF reader `pip install unstructured[pdf]`: "
            f"{e}"
        )

    page_items = defaultdict(list)
    items = partition(input_path)
    for item in items:
        page_number = item.metadata.page_number
        bbox = points_to_bbox(item.metadata.coordinates.points)
        coord_system = item.metadata.coordinates.system
        max_w, max_h = coord_system.width, coord_system.height
        page_items[page_number - 1].append(
            {
                "text": item.text,
                "box": bbox,
                "location": bbox_to_points(bbox),
                "page_shape": (max_w, max_h),
            }
        )

    return page_items


def merge_ocr_and_pdf_texts(
    ocr_list: List[dict], pdf_text_list: List[dict], debug_info=None
):
    """Merge PDF and OCR text using IOU overlapping location
    Args:
        ocr_list: List of OCR items {"text", "box", "location"}
        pdf_text_list: List of PDF items {"text", "box", "location"}

    Returns:
        Combined list of PDF text and non-overlap OCR text
    """
    not_matched_ocr = []

    # check for debug info
    if debug_info is not None:
        cv2, debug_im = debug_info

    for ocr_item in ocr_list:
        matched = False
        for pdf_item in pdf_text_list:
            if (
                get_rect_iou(ocr_item["location"], pdf_item["location"], iou_type=1)
                > IOU_THRES
            ):
                matched = True
                break

        color = (255, 0, 0)
        if not matched:
            ocr_item["matched"] = False
            not_matched_ocr.append(ocr_item)
            color = (0, 255, 255)

        if debug_info is not None:
            cv2.rectangle(
                debug_im,
                ocr_item["location"][0],
                ocr_item["location"][2],
                color=color,
                thickness=1,
            )

    if debug_info is not None:
        for pdf_item in pdf_text_list:
            cv2.rectangle(
                debug_im,
                pdf_item["location"][0],
                pdf_item["location"][2],
                color=(0, 255, 0),
                thickness=2,
            )

    return pdf_text_list + not_matched_ocr


def merge_table_cell_and_ocr(
    table_list: List[dict], ocr_list: List[dict], pdf_list: List[dict], debug_info=None
):
    """Merge table items with OCR text using IOU overlapping location
    Args:
        table_list: List of table items
            "type": ("table", "cell", "text"), "text", "box", "location"}
        ocr_list: List of OCR items {"text", "box", "location"}
        pdf_list: List of PDF items {"text", "box", "location"}

    Returns:
        all_table_cells: List of tables, each of table is represented
            by list of cells with combined text from OCR
        not_matched_items: List of PDF text which is not overlapped by table region
    """
    # check for debug info
    if debug_info is not None:
        cv2, debug_im = debug_info

    cell_list = [item for item in table_list if item["type"] == "cell"]
    table_list = [item for item in table_list if item["type"] == "table"]

    # sort table by area
    table_list = sorted(table_list, key=lambda item: box_area(item["bbox"]))

    all_tables = []
    matched_pdf_ids = []
    matched_cell_ids = []

    for table in table_list:
        if debug_info is not None:
            cv2.rectangle(
                debug_im,
                table["location"][0],
                table["location"][2],
                color=[0, 0, 255],
                thickness=5,
            )

        cur_table_cells = []
        for cell_id, cell in enumerate(cell_list):
            if cell_id in matched_cell_ids:
                continue

            if get_rect_iou(
                table["location"], cell["location"], iou_type=1
            ) > IOU_THRES and box_area(table["bbox"]) > box_area(cell["bbox"]):
                color = [128, 0, 128]
                # cell matched to table
                for item_list, item_type in [(pdf_list, "pdf"), (ocr_list, "ocr")]:
                    cell["ocr"] = []
                    for item_id, item in enumerate(item_list):
                        if item_type == "pdf" and item_id in matched_pdf_ids:
                            continue
                        if (
                            get_rect_iou(item["location"], cell["location"], iou_type=1)
                            > IOU_THRES
                        ):
                            cell["ocr"].append(item)
                            if item_type == "pdf":
                                matched_pdf_ids.append(item_id)

                    if len(cell["ocr"]) > 0:
                        # check if union of matched ocr does
                        # not extend over cell boundary,
                        # if True, continue to use OCR_list to match
                        all_box_points_in_cell = []
                        for item in cell["ocr"]:
                            all_box_points_in_cell.extend(item["location"])
                        union_box = union_points(all_box_points_in_cell)
                        cell_okay = (
                            box_h(union_box) <= box_h(cell["bbox"]) * PADDING_THRES
                            and box_w(union_box) <= box_w(cell["bbox"]) * PADDING_THRES
                        )
                    else:
                        cell_okay = False

                    if cell_okay:
                        if item_type == "pdf":
                            color = [255, 0, 255]
                        break

                if debug_info is not None:
                    cv2.rectangle(
                        debug_im,
                        cell["location"][0],
                        cell["location"][2],
                        color=color,
                        thickness=3,
                    )

                matched_cell_ids.append(cell_id)
                cur_table_cells.append(cell)

        all_tables.append(cur_table_cells)

    not_matched_items = [
        item for _id, item in enumerate(pdf_list) if _id not in matched_pdf_ids
    ]
    if debug_info is not None:
        for item in not_matched_items:
            cv2.rectangle(
                debug_im,
                item["location"][0],
                item["location"][2],
                color=[128, 128, 128],
                thickness=3,
            )

    return all_tables, not_matched_items


def parse_ocr_output(
    ocr_page_items: List[dict],
    pdf_page_items: Dict[int, List[dict]],
    artifact_path: Optional[str] = None,
    debug_path: Optional[str] = None,
):
    """Main function to combine OCR output and PDF text to
    form list of table / non-table regions
    Args:
        ocr_page_items: List of OCR items by page
        pdf_page_items: Dict of PDF texts (page number as key)
        debug_path: If specified, use OpenCV to plot debug image and save to debug_path
    """
    all_tables = []
    all_texts = []

    for page_id, page in enumerate(ocr_page_items):
        ocr_list = page["json"]["ocr"]
        table_list = page["json"]["table"]
        page_shape = page["image_shape"]
        pdf_item_list = pdf_page_items[page_id]

        # create bbox additional information
        for item in ocr_list:
            item["box"] = points_to_bbox(item["location"])

        # re-scale pdf items according to new image size
        for item in pdf_item_list:
            scale_factor = page_shape[0] / item["page_shape"][0]
            item["box"] = scale_box(item["box"], scale_factor=scale_factor)
            item["location"] = scale_points(item["location"], scale_factor=scale_factor)

        # if using debug mode, openCV must be installed
        if debug_path and artifact_path is not None:
            try:
                import cv2
            except ImportError:
                raise ImportError(
                    "Please install openCV first to use OCRReader debug mode"
                )
            image_path = Path(artifact_path) / page["image"]
            image = cv2.imread(str(image_path))
            debug_info = (cv2, image)
        else:
            debug_info = None

        new_pdf_list = merge_ocr_and_pdf_texts(
            ocr_list, pdf_item_list, debug_info=debug_info
        )

        # sort by reading order
        ocr_list = sort_funsd_reading_order(ocr_list)
        new_pdf_list = sort_funsd_reading_order(new_pdf_list)

        all_table_cells, non_table_text_list = merge_table_cell_and_ocr(
            table_list, ocr_list, new_pdf_list, debug_info=debug_info
        )

        table_texts = [table_cells_to_markdown(cells) for cells in all_table_cells]
        all_tables.extend([(page_id, text) for text in table_texts])
        all_texts.append(
            (page_id, " ".join(item["text"] for item in non_table_text_list))
        )

        # export debug image to debug_path
        if debug_path:
            cv2.imwrite(str(Path(debug_path) / "page_{}.png".format(page_id)), image)

    return all_tables, all_texts


================================================
FILE: libs/kotaemon/kotaemon/loaders/utils/table.py
================================================
import csv
from io import StringIO
from typing import List, Optional, Tuple

from .box import get_rect_iou


def check_col_conflicts(
    col_a: List[str], col_b: List[str], thres: float = 0.15
) -> bool:
    """Check if 2 columns A and B has non-empty content in the same row
    (to be used with merge_cols)

    Args:
        col_a: column A (list of str)
        col_b: column B (list of str)
        thres: percentage of overlapping allowed
    Returns:
        if number of overlapping greater than threshold
    """
    num_rows = len([cell for cell in col_a if cell])
    assert len(col_a) == len(col_b)
    conflict_count = 0
    for cell_a, cell_b in zip(col_a, col_b):
        if cell_a and cell_b:
            conflict_count += 1
    return conflict_count > num_rows * thres


def merge_cols(col_a: List[str], col_b: List[str]) -> List[str]:
    """Merge column A and B if they do not have conflict rows

    Args:
        col_a: column A (list of str)
        col_b: column B (list of str)
    Returns:
        merged column
    """
    for r_id in range(len(col_a)):
        if col_b[r_id]:
            col_a[r_id] = col_a[r_id] + " " + col_b[r_id]
    return col_a


def add_index_col(csv_rows: List[List[str]]) -> List[List[str]]:
    """Add index column as the first column of the table csv_rows

    Args:
        csv_rows: input table
    Returns:
        output table with index column
    """
    new_csv_rows = [["row id"] + [""] * len(csv_rows[0])]
    for r_id, row in enumerate(csv_rows):
        new_csv_rows.append([str(r_id + 1)] + row)
    return new_csv_rows


def compress_csv(csv_rows: List[List[str]]) -> List[List[str]]:
    """Compress table csv_rows by merging sparse columns (merge_cols)

    Args:
        csv_rows: input table
    Returns:
        output: compressed table
    """
    csv_cols = [[r[c_id] for r in csv_rows] for c_id in range(len(csv_rows[0]))]
    to_remove_col_ids = []
    last_c_id = 0
    for c_id in range(1, len(csv_cols)):
        if not check_col_conflicts(csv_cols[last_c_id], csv_cols[c_id]):
            to_remove_col_ids.append(c_id)
            csv_cols[last_c_id] = merge_cols(csv_cols[last_c_id], csv_cols[c_id])
        else:
            last_c_id = c_id

    csv_cols = [r for c_id, r in enumerate(csv_cols) if c_id not in to_remove_col_ids]
    csv_rows = [[c[r_id] for c in csv_cols] for r_id in range(len(csv_cols[0]))]
    return csv_rows


def get_table_from_ocr(ocr_list: List[dict], table_list: List[dict]):
    """Get list of text lines belong to table regions specified by table_list

    Args:
        ocr_list: list of OCR output in Casia format (Flax)
        table_list: list of table output in Casia format (Flax)

    Returns:
        _type_: _description_
    """
    table_texts = []
    for table in table_list:
        if table["type"] != "table":
            continue
        cur_table_texts = []
        for ocr in ocr_list:
            _iou = get_rect_iou(table["location"], ocr["location"], iou_type=1)
            if _iou > 0.8:
                cur_table_texts.append(ocr["text"])
        table_texts.append(cur_table_texts)

    return table_texts


def make_markdown_table(array: List[List[str]]) -> str:
    """Convert table rows in list format to markdown string

    Args:
        Python list with rows of table as lists
        First element as header.
        Example Input:
                [["Name", "Age", "Height"],
                ["Jake", 20, 5'10],
                ["Mary", 21, 5'7]]
    Returns:
        String to put into a .md file
    """
    array = compress_csv(array)
    array = add_index_col(array)
    markdown = "\n" + str("| ")

    for e in array[0]:
        to_add = " " + str(e) + str(" |")
        markdown += to_add
    markdown += "\n"

    markdown += "| "
    for i in range(len(array[0])):
        markdown += str("--- | ")
    markdown += "\n"

    for entry in array[1:]:
        markdown += str("| ")
        for e in entry:
            to_add = str(e) + str(" | ")
            markdown += to_add
        markdown += "\n"

    return markdown + "\n"


def parse_csv_string_to_list(csv_str: str) -> List[List[str]]:
    """Convert CSV string to list of rows

    Args:
        csv_str: input CSV string

    Returns:
        Output table in list format
    """
    io = StringIO(csv_str)
    csv_reader = csv.reader(io, delimiter=",")
    rows = [row for row in csv_reader]
    return rows


def format_cell(cell: str, length_limit: Optional[int] = None) -> str:
    """Format cell content by remove redundant character and enforce length limit

    Args:
        cell: input cell text
        length_limit: limit of text length.

    Returns:
        new cell text
    """
    cell = cell.replace("\n", " ")
    if length_limit:
        cell = cell[:length_limit]
    return cell


def extract_tables_from_csv_string(
    csv_content: str, table_texts: List[List[str]]
) -> Tuple[List[str], str]:
    """Extract list of table from FullOCR output
    (csv_content) with the specified table_texts

    Args:
        csv_content: CSV output from FullOCR pipeline
        table_texts: list of table texts extracted
        from get_table_from_ocr()

    Returns:
        List of tables and non-text content
    """
    rows = parse_csv_string_to_list(csv_content)
    used_row_ids = []
    table_csv_list = []
    for table in table_texts:
        cur_rows = []
        for row_id, row in enumerate(rows):
            scores = [
                any(cell in cell_reference for cell in table)
                for cell_reference in row
                if cell_reference
            ]
            score = sum(scores) / len(scores)
            if score > 0.5 and row_id not in used_row_ids:
                used_row_ids.append(row_id)
                cur_rows.append([format_cell(cell) for cell in row])
        if cur_rows:
            table_csv_list.append(make_markdown_table(cur_rows))
        else:
            print("table not matched", table)

    non_table_rows = [
        row for row_id, row in enumerate(rows) if row_id not in used_row_ids
    ]
    non_table_text = "\n".join(
        " ".join(format_cell(cell) for cell in row) for row in non_table_rows
    )
    return table_csv_list, non_table_text


def strip_special_chars_markdown(text: str) -> str:
    """Strip special characters from input text in markdown table format"""
    return text.replace("|", "").replace(":---:", "").replace("---", "")


def parse_markdown_text_to_tables(text: str) -> Tuple[List[str], List[str]]:
    """Convert markdown text to list of non-table spans and table spans

    Args:
        text: input markdown text

    Returns:
        list of table spans and non-table spans
    """
    # init empty tables and texts list
    tables = []
    texts = []

    # split input by line break
    lines = text.split("\n")
    cur_table = []
    cur_text: List[str] = []
    for line in lines:
        line = line.strip()
        if line.startswith("|"):
            if len(cur_text) > 0:
                texts.append(cur_text)
                cur_text = []
            cur_table.append(line)
        else:
            # add new table to the list
            if len(cur_table) > 0:
                tables.append(cur_table)
                cur_table = []
            cur_text.append(line)

    table_texts = ["\n".join(table) for table in tables]
    non_table_texts = ["\n".join(text) for text in texts]
    return table_texts, non_table_texts


def table_cells_to_markdown(cells: List[dict]):
    """Convert list of cells with attached text to Markdown table"""

    if len(cells) == 0:
        return ""

    all_row_ids = []
    all_col_ids = []
    for cell in cells:
        all_row_ids.extend(cell["rows"])
        all_col_ids.extend(cell["columns"])

    num_rows, num_cols = max(all_row_ids) + 1, max(all_col_ids) + 1
    table_rows = [["" for c in range(num_cols)] for r in range(num_rows)]

    # start filling in the grid
    for cell in cells:
        cell_text = " ".join(item["text"] for item in cell["ocr"])
        start_row_id, end_row_id = cell["rows"]
        start_col_id, end_col_id = cell["columns"]
        span_cell = end_row_id != start_row_id or end_col_id != start_col_id

        # do not repeat long text in span cell to prevent context length issue
        if span_cell and len(cell_text.replace(" ", "")) < 20 and start_row_id > 0:
            for row in range(start_row_id, end_row_id + 1):
                for col in range(start_col_id, end_col_id + 1):
                    table_rows[row][col] += cell_text + " "
        else:
            table_rows[start_row_id][start_col_id] += cell_text + " "

    return make_markdown_table(table_rows)


================================================
FILE: libs/kotaemon/kotaemon/loaders/web_loader.py
================================================
from pathlib import Path
from typing import Optional

import requests
from decouple import config

from kotaemon.base import Document

from .base import BaseReader

JINA_API_KEY = config("JINA_API_KEY", default="")
JINA_URL = config("JINA_URL", default="https://r.jina.ai/")


class WebReader(BaseReader):
    def run(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)

    def fetch_url(self, url: str):
        # setup the request
        api_url = f"https://r.jina.ai/{url}"
        headers = {
            "X-With-Links-Summary": "true",
        }
        if JINA_API_KEY:
            headers["Authorization"] = f"Bearer {JINA_API_KEY}"

        response = requests.get(api_url, headers=headers)
        response.raise_for_status()

        data = response.text
        return data

    def load_data(
        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
    ) -> list[Document]:
        file_path = str(file_path)
        output = self.fetch_url(file_path)
        metadata = extra_info or {}

        return [Document(text=output, metadata=metadata)]


================================================
FILE: libs/kotaemon/kotaemon/parsers/__init__.py
================================================
from .regex_extractor import FirstMatchRegexExtractor, RegexExtractor

__all__ = ["RegexExtractor", "FirstMatchRegexExtractor"]


================================================
FILE: libs/kotaemon/kotaemon/parsers/regex_extractor.py
================================================
from __future__ import annotations

import re
from typing import Callable

from kotaemon.base import BaseComponent, Document, ExtractorOutput, Param


class RegexExtractor(BaseComponent):
    """
    Simple class for extracting text from a document using a regex pattern.

    Args:
        pattern (List[str]): The regex pattern(s) to use.
        output_map (dict, optional): A mapping from extracted text to the
            desired output. Defaults to None.
    """

    class Config:
        middleware_switches = {"theflow.middleware.CachingMiddleware": False}

    pattern: list[str]
    output_map: dict[str, str] | Callable[[str], str] = Param(
        default_callback=lambda *_: {}
    )

    def __init__(self, pattern: str | list[str], **kwargs):
        if isinstance(pattern, str):
            pattern = [pattern]
        super().__init__(pattern=pattern, **kwargs)

    @staticmethod
    def run_raw_static(pattern: str, text: str) -> list[str]:
        """
        Finds all non-overlapping occurrences of a pattern in a string.

        Parameters:
            pattern (str): The regular expression pattern to search for.
            text (str): The input string to search in.

        Returns:
            List[str]: A list of all non-overlapping occurrences of the pattern in the
                string.
        """
        return re.findall(pattern, text)

    @staticmethod
    def map_output(text, output_map) -> str:
        """
        Maps the given `text` to its corresponding value in the `output_map` dictionary.

        Parameters:
            text (str): The input text to be mapped.
            output_map (dict): A dictionary containing mapping of input text to output
                values.

        Returns:
            str: The corresponding value from the `output_map` if `text` is found in the
                dictionary, otherwise returns the original `text`.
        """
        if not output_map:
            return text

        if isinstance(output_map, dict):
            return output_map.get(text, text)

        return output_map(text)

    def run_raw(self, text: str) -> ExtractorOutput:
        """
        Matches the raw text against the pattern and rans the output mapping, returning
            an instance of ExtractorOutput.

        Args:
            text (str): The raw text to be processed.

        Returns:
            ExtractorOutput: The processed output as a list of ExtractorOutput.
        """
        output: list[str] = sum(
            [self.run_raw_static(p, text) for p in self.pattern], []
        )
        output = [self.map_output(text, self.output_map) for text in output]

        return ExtractorOutput(
            text=output[0] if output else "",
            matches=output,
            metadata={"origin": "RegexExtractor"},
        )

    def run(
        self, text: str | list[str] | Document | list[Document]
    ) -> list[ExtractorOutput]:
        """Match the input against a pattern and return the output for each input

        Parameters:
            text: contains the input string to be processed

        Returns:
            A list contains the output ExtractorOutput for each input

        Example:
            ```pycon
            >>> document1 = Document(...)
            >>> document2 = Document(...)
            >>> document_batch = [document1, document2]
            >>> batch_output = self(document_batch)
            >>> print(batch_output)
            [output1_document1, output1_document2]
            ```
        """
        # TODO: this conversion seems common
        input_: list[str] = []
        if not isinstance(text, list):
            text = [text]

        for item in text:
            if isinstance(item, str):
                input_.append(item)
            elif isinstance(item, Document):
                input_.append(item.text)
            else:
                raise ValueError(
                    f"Invalid input type {type(item)}, should be str or Document"
                )

        output = []
        for each_input in input_:
            output.append(self.run_raw(each_input))

        return output


class FirstMatchRegexExtractor(RegexExtractor):
    pattern: list[str]

    def run_raw(self, text: str) -> ExtractorOutput:
        for p in self.pattern:
            output = self.run_raw_static(p, text)
            if output:
                output = [self.map_output(text, self.output_map) for text in output]
                return ExtractorOutput(
                    text=output[0],
                    matches=output,
                    metadata={"origin": "FirstMatchRegexExtractor"},
                )

        return ExtractorOutput(
            text=None, matches=[], metadata={"origin": "FirstMatchRegexExtractor"}
        )


================================================
FILE: libs/kotaemon/kotaemon/rerankings/__init__.py
================================================
from .base import BaseReranking
from .cohere import CohereReranking
from .tei_fast_rerank import TeiFastReranking
from .voyageai import VoyageAIReranking

__all__ = ["BaseReranking", "TeiFastReranking", "CohereReranking", "VoyageAIReranking"]


================================================
FILE: libs/kotaemon/kotaemon/rerankings/base.py
================================================
from __future__ import annotations

from abc import abstractmethod

from kotaemon.base import BaseComponent, Document


class BaseReranking(BaseComponent):
    @abstractmethod
    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Main method to transform list of documents
        (re-ranking, filtering, etc)"""
        ...


================================================
FILE: libs/kotaemon/kotaemon/rerankings/cohere.py
================================================
from __future__ import annotations

import os

from decouple import config

from kotaemon.base import Document, Param

from .base import BaseReranking


class CohereReranking(BaseReranking):
    """Cohere Reranking model"""

    model_name: str = Param(
        "rerank-multilingual-v2.0",
        help=(
            "ID of the model to use. You can go to [Supported Models]"
            "(https://docs.cohere.com/docs/rerank-2) to see the supported models"
        ),
        required=True,
    )
    cohere_api_key: str = Param(
        config("COHERE_API_KEY", ""),
        help="Cohere API key",
        required=True,
    )
    base_url: str = Param(
        None,
        help="Rerank API base url. Default is https://api.cohere.com",
        required=False,
    )

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use Cohere Reranker model to re-order documents
        with their relevance score"""
        try:
            import cohere
        except ImportError:
            raise ImportError(
                "Please install Cohere " "`pip install cohere` to use Cohere Reranking"
            )

        if not self.cohere_api_key or "COHERE_API_KEY" in self.cohere_api_key:
            print("Cohere API key not found. Skipping rerankings.")
            return documents

        cohere_client = cohere.Client(
            self.cohere_api_key, base_url=self.base_url or os.getenv("CO_API_URL")
        )
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        _docs = [d.content for d in documents]
        response = cohere_client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["reranking_score"] = r.relevance_score
            compressed_docs.append(doc)

        return compressed_docs


================================================
FILE: libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py
================================================
from __future__ import annotations

from typing import Optional

import requests

from kotaemon.base import Document, Param

from .base import BaseReranking

session = requests.session()


class TeiFastReranking(BaseReranking):
    """Text Embeddings Inference (TEI) Reranking model
    (https://huggingface.co/docs/text-embeddings-inference/en/index)
    """

    endpoint_url: str = Param(
        None, help="TEI Reranking service api base URL", required=True
    )
    model_name: Optional[str] = Param(
        None,
        help=(
            "ID of the model to use. You can go to [Supported Models]"
            "(https://github.com/huggingface"
            "/text-embeddings-inference?tab=readme-ov-file"
            "#supported-models) to see the supported models"
        ),
    )
    is_truncated: Optional[bool] = Param(True, help="Whether to truncate the inputs")
    max_tokens: Optional[int] = Param(
        512,
        help=(
            "This option is used to specify the "
            "maximum number of tokens supported by the reranker model."
        ),
    )

    def client(self, query, texts):
        if self.is_truncated:
            max_tokens = self.max_tokens  # default is 512 tokens.
            truncated_texts = [text[:max_tokens] for text in texts]

        response = session.post(
            url=self.endpoint_url,
            json={
                "query": query,
                "texts": truncated_texts,
                "is_truncated": self.is_truncated,  # default is True
            },
        ).json()
        return response

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use the deployed TEI rerankings service to re-order documents
        with their relevance score"""
        if not self.endpoint_url:
            print("TEI API reranking URL not found. Skipping rerankings.")
            return documents

        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        if isinstance(documents[0], str):
            documents = self.prepare_input(documents)

        batch_size = 6
        num_batch = max(len(documents) // batch_size, 1)
        for i in range(num_batch):
            if i == num_batch - 1:
                mini_batch = documents[batch_size * i :]
            else:
                mini_batch = documents[batch_size * i : batch_size * (i + 1)]

            _docs = [d.content for d in mini_batch]
            rerank_resp = self.client(query, _docs)
            for r in rerank_resp:
                doc = mini_batch[r["index"]]
                doc.metadata["reranking_score"] = r["score"]
                compressed_docs.append(doc)

        compressed_docs = sorted(
            compressed_docs, key=lambda x: x.metadata["reranking_score"], reverse=True
        )
        return compressed_docs


================================================
FILE: libs/kotaemon/kotaemon/rerankings/voyageai.py
================================================
from __future__ import annotations

import importlib

from decouple import config

from kotaemon.base import Document, Param

from .base import BaseReranking

vo = None


def _import_voyageai():
    global vo
    if not vo:
        vo = importlib.import_module("voyageai")
    return vo


class VoyageAIReranking(BaseReranking):
    """VoyageAI Reranking model"""

    model_name: str = Param(
        "rerank-2",
        help=(
            "ID of the model to use. You can go to [Supported Models]"
            "(https://docs.voyageai.com/docs/reranker) to see the supported models"
        ),
        required=True,
    )
    api_key: str = Param(
        config("VOYAGE_API_KEY", ""),
        help="VoyageAI API key",
        required=True,
    )

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if not self.api_key:
            raise ValueError("API key must be provided for VoyageAIEmbeddings.")

        self._client = _import_voyageai().Client(api_key=self.api_key)
        self._aclient = _import_voyageai().AsyncClient(api_key=self.api_key)

    def run(self, documents: list[Document], query: str) -> list[Document]:
        """Use VoyageAI Reranker model to re-order documents
        with their relevance score"""
        compressed_docs: list[Document] = []

        if not documents:  # to avoid empty api call
            return compressed_docs

        _docs = [d.content for d in documents]
        response = self._client.rerank(
            model=self.model_name, query=query, documents=_docs
        )
        for r in response.results:
            doc = documents[r.index]
            doc.metadata["reranking_score"] = r.relevance_score
            compressed_docs.append(doc)

        return compressed_docs


================================================
FILE: libs/kotaemon/kotaemon/storages/__init__.py
================================================
from .docstores import (
    BaseDocumentStore,
    ElasticsearchDocumentStore,
    InMemoryDocumentStore,
    LanceDBDocumentStore,
    SimpleFileDocumentStore,
)
from .vectorstores import (
    BaseVectorStore,
    ChromaVectorStore,
    InMemoryVectorStore,
    LanceDBVectorStore,
    MilvusVectorStore,
    QdrantVectorStore,
    SimpleFileVectorStore,
)

__all__ = [
    # Document stores
    "BaseDocumentStore",
    "InMemoryDocumentStore",
    "ElasticsearchDocumentStore",
    "SimpleFileDocumentStore",
    "LanceDBDocumentStore",
    # Vector stores
    "BaseVectorStore",
    "ChromaVectorStore",
    "InMemoryVectorStore",
    "SimpleFileVectorStore",
    "LanceDBVectorStore",
    "MilvusVectorStore",
    "QdrantVectorStore",
]


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/__init__.py
================================================
from .base import BaseDocumentStore
from .elasticsearch import ElasticsearchDocumentStore
from .in_memory import InMemoryDocumentStore
from .lancedb import LanceDBDocumentStore
from .simple_file import SimpleFileDocumentStore

__all__ = [
    "BaseDocumentStore",
    "InMemoryDocumentStore",
    "ElasticsearchDocumentStore",
    "SimpleFileDocumentStore",
    "LanceDBDocumentStore",
]


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/base.py
================================================
from abc import ABC, abstractmethod
from typing import List, Optional, Union

from kotaemon.base import Document


class BaseDocumentStore(ABC):
    """A document store is in charged of storing and managing documents"""

    @abstractmethod
    def __init__(self, *args, **kwargs):
        ...

    @abstractmethod
    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        **kwargs,
    ):
        """Add document into document store

        Args:
            docs: Document or list of documents
            ids: List of ids of the documents. Optional, if not set will use doc.doc_id
        """
        ...

    @abstractmethod
    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        ...

    @abstractmethod
    def get_all(self) -> List[Document]:
        """Get all documents"""
        ...

    @abstractmethod
    def count(self) -> int:
        """Count number of documents"""
        ...

    @abstractmethod
    def query(
        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
    ) -> List[Document]:
        """Search document store using search query"""
        ...

    @abstractmethod
    def delete(self, ids: Union[List[str], str]):
        """Delete document by id"""
        ...

    @abstractmethod
    def drop(self):
        """Drop the document store"""
        ...


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
================================================
from typing import List, Optional, Union

from kotaemon.base import Document

from .base import BaseDocumentStore

MAX_DOCS_TO_GET = 10**4


class ElasticsearchDocumentStore(BaseDocumentStore):
    """Simple memory document store that store document in a dictionary"""

    def __init__(
        self,
        collection_name: str = "docstore",
        elasticsearch_url: str = "http://localhost:9200",
        k1: float = 2.0,
        b: float = 0.75,
        **kwargs,
    ):
        try:
            from elasticsearch import Elasticsearch
            from elasticsearch.helpers import bulk
        except ImportError:
            raise ImportError(
                "To use ElaticsearchDocstore please install `pip install elasticsearch`"
            )

        self.elasticsearch_url = elasticsearch_url
        self.index_name = collection_name
        self.k1 = k1
        self.b = b

        # Create an Elasticsearch client instance
        self.client = Elasticsearch(elasticsearch_url, **kwargs)
        self.es_bulk = bulk
        # Define the index settings and mappings
        settings = {
            "analysis": {"analyzer": {"default": {"type": "standard"}}},
            "similarity": {
                "custom_bm25": {
                    "type": "BM25",
                    "k1": k1,
                    "b": b,
                }
            },
        }
        mappings = {
            "properties": {
                "content": {
                    "type": "text",
                    "similarity": "custom_bm25",  # Use the custom BM25 similarity
                }
            }
        }

        # Create the index with the specified settings and mappings
        if not self.client.indices.exists(index=self.index_name):
            self.client.indices.create(
                index=self.index_name, mappings=mappings, settings=settings
            )

    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        refresh_indices: bool = True,
        **kwargs,
    ):
        """Add document into document store

        Args:
            docs: list of documents to add
            ids: specify the ids of documents to add or use existing doc.doc_id
            refresh_indices: request Elasticsearch to update its index (default to True)
        """
        if ids and not isinstance(ids, list):
            ids = [ids]
        if not isinstance(docs, list):
            docs = [docs]
        doc_ids = ids if ids else [doc.doc_id for doc in docs]

        requests = []
        for doc_id, doc in zip(doc_ids, docs):
            text = doc.text
            metadata = doc.metadata
            request = {
                "_op_type": "index",
                "_index": self.index_name,
                "content": text,
                "metadata": metadata,
                "_id": doc_id,
            }
            requests.append(request)

        success, failed = self.es_bulk(self.client, requests)
        print("Added/Updated documents to index", success)
        print("Failed documents to index", failed)

        if refresh_indices:
            self.client.indices.refresh(index=self.index_name)

    def query_raw(self, query: dict) -> List[Document]:
        """Query Elasticsearch store using query format of ES client

        Args:
            query (dict): Elasticsearch query format

        Returns:
            List[Document]: List of result documents
        """
        res = self.client.search(index=self.index_name, body=query)
        docs = []
        for r in res["hits"]["hits"]:
            docs.append(
                Document(
                    id_=r["_id"],
                    text=r["_source"]["content"],
                    metadata=r["_source"]["metadata"],
                )
            )
        return docs

    def query(
        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
    ) -> List[Document]:
        """Search Elasticsearch docstore using search query (BM25)

        Args:
            query (str): query text
            top_k (int, optional): number of
                top documents to return. Defaults to 10.

        Returns:
            List[Document]: List of result documents
        """
        query_dict: dict = {"match": {"content": query}}
        if doc_ids is not None:
            query_dict = {"bool": {"must": [query_dict, {"terms": {"_id": doc_ids}}]}}
        query_dict = {"query": query_dict, "size": top_k}
        return self.query_raw(query_dict)

    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        if not isinstance(ids, list):
            ids = [ids]
        query_dict = {"query": {"terms": {"_id": ids}}, "size": 10000}
        return self.query_raw(query_dict)

    def count(self) -> int:
        """Count number of documents"""
        count = int(
            self.client.cat.count(index=self.index_name, format="json")[0]["count"]
        )
        return count

    def get_all(self) -> List[Document]:
        """Get all documents"""
        query_dict = {"query": {"match_all": {}}, "size": MAX_DOCS_TO_GET}
        return self.query_raw(query_dict)

    def delete(self, ids: Union[List[str], str]):
        """Delete document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        query = {"query": {"terms": {"_id": ids}}}
        self.client.delete_by_query(index=self.index_name, body=query)
        self.client.indices.refresh(index=self.index_name)

    def drop(self):
        """Drop the document store"""
        self.client.indices.delete(index=self.index_name)
        self.client.indices.refresh(index=self.index_name)

    def __persist_flow__(self):
        return {
            "index_name": self.index_name,
            "elasticsearch_url": self.elasticsearch_url,
            "k1": self.k1,
            "b": self.b,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/in_memory.py
================================================
import json
from pathlib import Path
from typing import List, Optional, Union

from kotaemon.base import Document

from .base import BaseDocumentStore


class InMemoryDocumentStore(BaseDocumentStore):
    """Simple memory document store that store document in a dictionary"""

    def __init__(self):
        self._store = {}

    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        **kwargs,
    ):
        """Add document into document store

        Args:
            docs: list of documents to add
            ids: specify the ids of documents to add or
                use existing doc.doc_id
            exist_ok: raise error when duplicate doc-id
                found in the docstore (default to False)
        """
        exist_ok: bool = kwargs.pop("exist_ok", False)

        if ids and not isinstance(ids, list):
            ids = [ids]
        if not isinstance(docs, list):
            docs = [docs]
        doc_ids = ids if ids else [doc.doc_id for doc in docs]

        for doc_id, doc in zip(doc_ids, docs):
            if doc_id in self._store and not exist_ok:
                raise ValueError(f"Document with id {doc_id} already exist")
            self._store[doc_id] = doc

    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        return [self._store[doc_id] for doc_id in ids]

    def get_all(self) -> List[Document]:
        """Get all documents"""
        return list(self._store.values())

    def count(self) -> int:
        """Count number of documents"""
        return len(self._store)

    def delete(self, ids: Union[List[str], str]):
        """Delete document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        for doc_id in ids:
            del self._store[doc_id]

    def save(self, path: Union[str, Path]):
        """Save document to path"""
        store = {key: value.to_dict() for key, value in self._store.items()}
        with open(path, "w") as f:
            json.dump(store, f)

    def load(self, path: Union[str, Path]):
        """Load document store from path"""
        with open(path) as f:
            store = json.load(f)
        # TODO: save and load aren't lossless. A Document-subclass will lose
        # information. Need to edit the `to_dict` and `from_dict` methods in
        # the Document class.
        # For better query support, utilize SQLite as the default document store.
        # Also, for portability, use SQLAlchemy for document store.
        self._store = {key: Document.from_dict(value) for key, value in store.items()}

    def query(
        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
    ) -> List[Document]:
        """Perform full-text search on document store"""
        return []

    def __persist_flow__(self):
        return {}

    def drop(self):
        """Drop the document store"""
        self._store = {}


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/lancedb.py
================================================
import json
from typing import List, Optional, Union

from kotaemon.base import Document

from .base import BaseDocumentStore

MAX_DOCS_TO_GET = 10**4


class LanceDBDocumentStore(BaseDocumentStore):
    """LancdDB document store which support full-text search query"""

    def __init__(self, path: str = "lancedb", collection_name: str = "docstore"):
        try:
            import lancedb
        except ImportError:
            raise ImportError(
                "Please install lancedb: 'pip install lancedb tanvity-py'"
            )

        self.db_uri = path
        self.collection_name = collection_name
        self.db_connection = lancedb.connect(self.db_uri)  # type: ignore

    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        refresh_indices: bool = True,
        **kwargs,
    ):
        """Load documents into lancedb storage."""
        doc_ids = ids if ids else [doc.doc_id for doc in docs]
        data: list[dict[str, str]] | None = [
            {
                "id": doc_id,
                "text": doc.text,
                "attributes": json.dumps(doc.metadata),
            }
            for doc_id, doc in zip(doc_ids, docs)
        ]

        if self.collection_name not in self.db_connection.table_names():
            if data:
                document_collection = self.db_connection.create_table(
                    self.collection_name, data=data, mode="overwrite"
                )
        else:
            # add data to existing table
            document_collection = self.db_connection.open_table(self.collection_name)
            if data:
                document_collection.add(data)

        if refresh_indices:
            document_collection.create_fts_index(
                "text",
                tokenizer_name="en_stem",
                replace=True,
            )

    def query(
        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
    ) -> List[Document]:
        if doc_ids:
            id_filter = ", ".join([f"'{_id}'" for _id in doc_ids])
            query_filter = f"id in ({id_filter})"
        else:
            query_filter = None
        try:
            document_collection = self.db_connection.open_table(self.collection_name)
            if query_filter:
                docs = (
                    document_collection.search(query, query_type="fts")
                    .where(query_filter, prefilter=True)
                    .limit(top_k)
                    .to_list()
                )
            else:
                docs = (
                    document_collection.search(query, query_type="fts")
                    .limit(top_k)
                    .to_list()
                )
        except (ValueError, FileNotFoundError):
            docs = []
        return [
            Document(
                id_=doc["id"],
                text=doc["text"] if doc["text"] else "<empty>",
                metadata=json.loads(doc["attributes"]),
            )
            for doc in docs
        ]

    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        if len(ids) == 0:
            return []

        id_filter = ", ".join([f"'{_id}'" for _id in ids])
        try:
            document_collection = self.db_connection.open_table(self.collection_name)
            query_filter = f"id in ({id_filter})"
            docs = (
                document_collection.search()
                .where(query_filter)
                .limit(MAX_DOCS_TO_GET)
                .to_list()
            )
        except (ValueError, FileNotFoundError):
            docs = []

        # return the documents using the order of original
        # ids (which were ordered by score)
        doc_dict = {
            doc["id"]: Document(
                id_=doc["id"],
                text=doc["text"] if doc["text"] else "<empty>",
                metadata=json.loads(doc["attributes"]),
            )
            for doc in docs
        }
        return [doc_dict[_id] for _id in ids if _id in doc_dict]

    def delete(self, ids: Union[List[str], str], refresh_indices: bool = True):
        """Delete document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        document_collection = self.db_connection.open_table(self.collection_name)
        id_filter = ", ".join([f"'{_id}'" for _id in ids])
        query_filter = f"id in ({id_filter})"
        document_collection.delete(query_filter)

        if refresh_indices:
            document_collection.create_fts_index(
                "text",
                tokenizer_name="en_stem",
                replace=True,
            )

    def drop(self):
        """Drop the document store"""
        self.db_connection.drop_table(self.collection_name)

    def count(self) -> int:
        raise NotImplementedError

    def get_all(self) -> List[Document]:
        raise NotImplementedError

    def __persist_flow__(self):
        return {
            "db_uri": self.db_uri,
            "collection_name": self.collection_name,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/docstores/simple_file.py
================================================
from pathlib import Path
from typing import List, Optional, Union

from kotaemon.base import Document

from .in_memory import InMemoryDocumentStore


class SimpleFileDocumentStore(InMemoryDocumentStore):
    """Improve InMemoryDocumentStore by auto saving whenever the corpus is changed"""

    def __init__(self, path: str | Path, collection_name: str = "default"):
        super().__init__()
        self._path = path
        self._collection_name = collection_name

        Path(path).mkdir(parents=True, exist_ok=True)
        self._save_path = Path(path) / f"{collection_name}.json"
        if self._save_path.is_file():
            self.load(self._save_path)

    def get(self, ids: Union[List[str], str]) -> List[Document]:
        """Get document by id"""
        if not isinstance(ids, list):
            ids = [ids]

        for doc_id in ids:
            if doc_id not in self._store:
                self.load(self._save_path)
                break

        return [self._store[doc_id] for doc_id in ids]

    def add(
        self,
        docs: Union[Document, List[Document]],
        ids: Optional[Union[List[str], str]] = None,
        **kwargs,
    ):
        """Add document into document store

        Args:
            docs: list of documents to add
            ids: specify the ids of documents to add or
                use existing doc.doc_id
            exist_ok: raise error when duplicate doc-id
                found in the docstore (default to False)
        """
        super().add(docs=docs, ids=ids, **kwargs)
        self.save(self._save_path)

    def delete(self, ids: Union[List[str], str]):
        """Delete document by id"""
        super().delete(ids=ids)
        self.save(self._save_path)

    def drop(self):
        """Drop the document store"""
        super().drop()
        self._save_path.unlink(missing_ok=True)

    def __persist_flow__(self):
        from theflow.utils.modules import serialize

        return {
            "path": serialize(self._path),
            "collection_name": self._collection_name,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/__init__.py
================================================
from .base import BaseVectorStore
from .chroma import ChromaVectorStore
from .in_memory import InMemoryVectorStore
from .lancedb import LanceDBVectorStore
from .milvus import MilvusVectorStore
from .qdrant import QdrantVectorStore
from .simple_file import SimpleFileVectorStore

__all__ = [
    "BaseVectorStore",
    "ChromaVectorStore",
    "InMemoryVectorStore",
    "SimpleFileVectorStore",
    "LanceDBVectorStore",
    "MilvusVectorStore",
    "QdrantVectorStore",
]


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/base.py
================================================
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Optional

from llama_index.core.schema import NodeRelationship, RelatedNodeInfo
from llama_index.core.vector_stores.types import BasePydanticVectorStore
from llama_index.core.vector_stores.types import VectorStore as LIVectorStore
from llama_index.core.vector_stores.types import VectorStoreQuery

from kotaemon.base import DocumentWithEmbedding


class BaseVectorStore(ABC):
    @abstractmethod
    def __init__(self, *args, **kwargs):
        ...

    @abstractmethod
    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ) -> list[str]:
        """Add vector embeddings to vector stores

        Args:
            embeddings: List of embeddings
            metadatas: List of metadata of the embeddings
            ids: List of ids of the embeddings
            kwargs: meant for vectorstore-specific parameters

        Returns:
            List of ids of the embeddings
        """
        ...

    @abstractmethod
    def delete(self, ids: list[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        ...

    @abstractmethod
    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        ...

    @abstractmethod
    def drop(self):
        """Drop the vector store"""
        ...


class LlamaIndexVectorStore(BaseVectorStore):
    """Mixin for LlamaIndex based vectorstores"""

    _li_class: type[LIVectorStore | BasePydanticVectorStore] | None

    def _get_li_class(self):
        raise NotImplementedError(
            "Please return the relevant LlamaIndex class in in _get_li_class"
        )

    def __init__(self, *args, **kwargs):
        # get li_class from the method if not set
        if not self._li_class:
            LIClass = self._get_li_class()
        else:
            LIClass = self._li_class

        from dataclasses import fields

        self._client = LIClass(*args, **kwargs)

        self._vsq_kwargs = {_.name for _ in fields(VectorStoreQuery)}
        for key in ["query_embedding", "similarity_top_k", "node_ids"]:
            if key in self._vsq_kwargs:
                self._vsq_kwargs.remove(key)

    def __setattr__(self, name: str, value: Any) -> None:
        if name.startswith("_"):
            return super().__setattr__(name, value)

        return setattr(self._client, name, value)

    def __getattr__(self, name: str) -> Any:
        if name == "_li_class":
            return super().__getattribute__(name)

        return getattr(self._client, name)

    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ):
        if isinstance(embeddings[0], list):
            nodes: list[DocumentWithEmbedding] = [
                DocumentWithEmbedding(embedding=embedding) for embedding in embeddings
            ]
        else:
            nodes = embeddings  # type: ignore
        if metadatas is not None:
            for node, metadata in zip(nodes, metadatas):
                node.metadata = metadata
        if ids is not None:
            for node, id in zip(nodes, ids):
                node.id_ = id
                node.relationships = {
                    NodeRelationship.SOURCE: RelatedNodeInfo(node_id=id)
                }

        return self._client.add(nodes=nodes)

    def delete(self, ids: list[str], **kwargs):
        for id_ in ids:
            self._client.delete(ref_doc_id=id_, **kwargs)

    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        """Return the top k most similar vector embeddings

        Args:
            embedding: List of embeddings
            top_k: Number of most similar embeddings to return
            ids: List of ids of the embeddings to be queried
            kwargs: extra query parameters. Depending on the name, these parameters
                will be used when constructing the VectorStoreQuery object or when
                performing querying of the underlying vector store.

        Returns:
            the matched embeddings, the similarity scores, and the ids
        """
        vsq_kwargs = {}
        vs_kwargs = {}
        for kwkey, kwvalue in kwargs.items():
            if kwkey in self._vsq_kwargs:
                vsq_kwargs[kwkey] = kwvalue
            else:
                vs_kwargs[kwkey] = kwvalue

        output = self._client.query(
            query=VectorStoreQuery(
                query_embedding=embedding,
                similarity_top_k=top_k,
                node_ids=ids,
                **vsq_kwargs,
            ),
            **vs_kwargs,
        )

        embeddings = []
        if output.nodes:
            for node in output.nodes:
                embeddings.append(node.embedding)
        similarities = output.similarities if output.similarities else []
        out_ids = output.ids if output.ids else []

        return embeddings, similarities, out_ids


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/chroma.py
================================================
from typing import Any, Dict, List, Optional, Type, cast

from llama_index.vector_stores.chroma import ChromaVectorStore as LIChromaVectorStore

from .base import LlamaIndexVectorStore


class ChromaVectorStore(LlamaIndexVectorStore):
    _li_class: Type[LIChromaVectorStore] = LIChromaVectorStore

    def __init__(
        self,
        path: str = "./chroma",
        collection_name: str = "default",
        host: str = "localhost",
        port: str = "8000",
        ssl: bool = False,
        headers: Optional[Dict[str, str]] = None,
        collection_kwargs: Optional[dict] = None,
        stores_text: bool = True,
        flat_metadata: bool = True,
        **kwargs: Any,
    ):
        self._path = path
        self._collection_name = collection_name
        self._host = host
        self._port = port
        self._ssl = ssl
        self._headers = headers
        self._collection_kwargs = collection_kwargs
        self._stores_text = stores_text
        self._flat_metadata = flat_metadata
        self._kwargs = kwargs

        try:
            import chromadb
        except ImportError:
            raise ImportError(
                "ChromaVectorStore requires chromadb. "
                "Please install chromadb first `pip install chromadb`"
            )

        client = chromadb.PersistentClient(path=path)
        collection = client.get_or_create_collection(collection_name)

        # pass through for nice IDE support
        super().__init__(
            chroma_collection=collection,
            host=host,
            port=port,
            ssl=ssl,
            headers=headers or {},
            collection_kwargs=collection_kwargs or {},
            stores_text=stores_text,
            flat_metadata=flat_metadata,
            **kwargs,
        )
        self._client = cast(LIChromaVectorStore, self._client)

    def delete(self, ids: List[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        self._client.client.delete(ids=ids)

    def drop(self):
        """Delete entire collection from vector stores"""
        self._client.client._client.delete_collection(self._client.client.name)

    def count(self) -> int:
        return self._collection.count()

    def __persist_flow__(self):
        return {
            "path": self._path,
            "collection_name": self._collection_name,
            "host": self._host,
            "port": self._port,
            "ssl": self._ssl,
            "headers": self._headers,
            "collection_kwargs": self._collection_kwargs,
            "stores_text": self._stores_text,
            "flat_metadata": self._flat_metadata,
            **self._kwargs,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py
================================================
"""Simple vector store index."""
from typing import Any, Optional, Type

import fsspec
from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.core.vector_stores.simple import SimpleVectorStoreData

from .base import LlamaIndexVectorStore


class InMemoryVectorStore(LlamaIndexVectorStore):
    _li_class: Type[LISimpleVectorStore] = LISimpleVectorStore
    store_text: bool = False

    def __init__(
        self,
        data: Optional[SimpleVectorStoreData] = None,
        fs: Optional[fsspec.AbstractFileSystem] = None,
        **kwargs: Any,
    ) -> None:
        """Initialize params."""
        self._data = data or SimpleVectorStoreData()
        self._fs = fs or fsspec.filesystem("file")

        super().__init__(
            data=data,
            fs=fs,
            **kwargs,
        )

    def save(
        self,
        save_path: str,
        fs: Optional[fsspec.AbstractFileSystem] = None,
        **kwargs,
    ):

        """save a simpleVectorStore to a dictionary.

        Args:
            save_path: Path of saving vector to disk.
            fs: An abstract super-class for pythonic file-systems
        """
        self._client.persist(persist_path=save_path, fs=fs)

    def load(self, load_path: str, fs: Optional[fsspec.AbstractFileSystem] = None):

        """Create a SimpleKVStore from a load directory.

        Args:
            load_path: Path of loading vector.
            fs: An abstract super-class for pythonic file-systems
        """
        self._client = self._client.from_persist_path(persist_path=load_path, fs=fs)

    def drop(self):
        """Clear the old data"""
        self._data = SimpleVectorStoreData()

    def __persist_flow__(self):
        d = self._data.to_dict()
        d["__type__"] = f"{self._data.__module__}.{self._data.__class__.__qualname__}"
        return {
            "data": d,
            # "fs": self._fs,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/lancedb.py
================================================
from typing import Any, List, Type, cast

from llama_index.core.vector_stores.types import MetadataFilters
from llama_index.vector_stores.lancedb import LanceDBVectorStore as LILanceDBVectorStore
from llama_index.vector_stores.lancedb import base as base_lancedb

from .base import LlamaIndexVectorStore

# custom monkey patch for LanceDB
original_to_lance_filter = base_lancedb._to_lance_filter


def custom_to_lance_filter(
    standard_filters: MetadataFilters, metadata_keys: list
) -> Any:
    for filter in standard_filters.filters:
        if isinstance(filter.value, list):
            # quote string values if filter are list of strings
            if filter.value and isinstance(filter.value[0], str):
                filter.value = [f"'{v}'" for v in filter.value]

    return original_to_lance_filter(standard_filters, metadata_keys)


# skip table existence check
LILanceDBVectorStore._table_exists = lambda _: False
base_lancedb._to_lance_filter = custom_to_lance_filter


class LanceDBVectorStore(LlamaIndexVectorStore):
    _li_class: Type[LILanceDBVectorStore] = LILanceDBVectorStore

    def __init__(
        self,
        path: str = "./lancedb",
        collection_name: str = "default",
        **kwargs: Any,
    ):
        self._path = path
        self._collection_name = collection_name

        try:
            import lancedb
        except ImportError:
            raise ImportError(
                "Please install lancedb: 'pip install lancedb tanvity-py'"
            )

        db_connection = lancedb.connect(path)  # type: ignore
        try:
            table = db_connection.open_table(collection_name)
        except FileNotFoundError:
            table = None

        self._kwargs = kwargs

        # pass through for nice IDE support
        super().__init__(
            uri=path,
            table_name=collection_name,
            table=table,
            **kwargs,
        )
        self._client = cast(LILanceDBVectorStore, self._client)
        self._client._metadata_keys = ["file_id"]

    def delete(self, ids: List[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        self._client.delete_nodes(ids)

    def drop(self):
        """Delete entire collection from vector stores"""
        self._client.client.drop_table(self.collection_name)

    def count(self) -> int:
        raise NotImplementedError

    def __persist_flow__(self):
        return {
            "path": self._path,
            "collection_name": self._collection_name,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/milvus.py
================================================
import os
from typing import Any, Optional, cast

from kotaemon.base import DocumentWithEmbedding

from .base import LlamaIndexVectorStore


class MilvusVectorStore(LlamaIndexVectorStore):
    _li_class = None

    def _get_li_class(self):
        try:
            from llama_index.vector_stores.milvus import (
                MilvusVectorStore as LIMilvusVectorStore,
            )
        except ImportError:
            raise ImportError(
                "Please install missing package: "
                "'pip install llama-index-vector-stores-milvus'"
            )

        return LIMilvusVectorStore

    def __init__(
        self,
        uri: str = "./milvus.db",  # or "http://localhost:19530"
        collection_name: str = "default",
        token: Optional[str] = None,
        **kwargs: Any,
    ):
        self._uri = uri
        self._collection_name = collection_name
        self._token = token
        self._kwargs = kwargs
        self._path = kwargs.get("path", None)
        self._inited = False

    def _lazy_init(self, dim: Optional[int] = None):
        """
        Lazy init the client.
        Because the LlamaIndex init method requires the dim parameter,
        we need to try to get the dim from the first embedding.

        Args:
            dim: Dimension of the vectors.
        """
        if not self._inited:
            if os.path.isdir(self._path) and not self._uri.startswith("http"):
                uri = os.path.join(self._path, self._uri)
            else:
                uri = self._uri
            super().__init__(
                uri=uri,
                token=self._token,
                collection_name=self._collection_name,
                dim=dim,
                **self._kwargs,
            )
            from llama_index.vector_stores.milvus import (
                MilvusVectorStore as LIMilvusVectorStore,
            )

            self._client = cast(LIMilvusVectorStore, self._client)
        self._inited = True

    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ):
        if not self._inited:
            if isinstance(embeddings[0], list):
                dim = len(embeddings[0])
            else:
                dim = len(embeddings[0].embedding)
            self._lazy_init(dim)

        return super().add(embeddings=embeddings, metadatas=metadatas, ids=ids)

    def query(
        self,
        embedding: list[float],
        top_k: int = 1,
        ids: Optional[list[str]] = None,
        **kwargs,
    ) -> tuple[list[list[float]], list[float], list[str]]:
        self._lazy_init(len(embedding))

        return super().query(embedding=embedding, top_k=top_k, ids=ids, **kwargs)

    def delete(self, ids: list[str], **kwargs):
        self._lazy_init()
        super().delete(ids=ids, **kwargs)

    def drop(self):
        self._client.client.drop_collection(self._collection_name)

    def count(self) -> int:
        try:
            self._lazy_init()
        except:  # noqa: E722
            return 0
        return self._client.client.query(
            collection_name=self._collection_name, output_fields=["count(*)"]
        )[0]["count(*)"]

    def __persist_flow__(self):
        return {
            "uri": self._uri,
            "collection_name": self._collection_name,
            "token": self._token,
            **self._kwargs,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/qdrant.py
================================================
from typing import Any, List, Optional, cast

from .base import LlamaIndexVectorStore


class QdrantVectorStore(LlamaIndexVectorStore):
    _li_class = None

    def _get_li_class(self):
        try:
            from llama_index.vector_stores.qdrant import (
                QdrantVectorStore as LIQdrantVectorStore,
            )
        except ImportError:
            raise ImportError(
                "Please install missing package: "
                "'pip install llama-index-vector-stores-qdrant'"
            )

        return LIQdrantVectorStore

    def __init__(
        self,
        collection_name,
        url: Optional[str] = None,
        api_key: Optional[str] = None,
        client_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ):
        self._collection_name = collection_name
        self._url = url
        self._api_key = api_key
        self._client_kwargs = client_kwargs
        self._kwargs = kwargs

        super().__init__(
            collection_name=collection_name,
            url=url,
            api_key=api_key,
            client_kwargs=client_kwargs,
            **kwargs,
        )
        from llama_index.vector_stores.qdrant import (
            QdrantVectorStore as LIQdrantVectorStore,
        )

        self._client = cast(LIQdrantVectorStore, self._client)

    def delete(self, ids: List[str], **kwargs):
        """Delete vector embeddings from vector stores

        Args:
            ids: List of ids of the embeddings to be deleted
            kwargs: meant for vectorstore-specific parameters
        """
        from qdrant_client import models

        self._client.client.delete(
            collection_name=self._collection_name,
            points_selector=models.PointIdsList(
                points=ids,
            ),
            **kwargs,
        )

    def drop(self):
        """Delete entire collection from vector stores"""
        self._client.client.delete_collection(self._collection_name)

    def count(self) -> int:
        return self._client.client.count(
            collection_name=self._collection_name, exact=True
        ).count

    def __persist_flow__(self):
        return {
            "collection_name": self._collection_name,
            "url": self._url,
            "api_key": self._api_key,
            "client_kwargs": self._client_kwargs,
            **self._kwargs,
        }


================================================
FILE: libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py
================================================
"""Simple file vector store index."""
from pathlib import Path
from typing import Any, Optional, Type

import fsspec
from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore
from llama_index.core.vector_stores.simple import SimpleVectorStoreData

from kotaemon.base import DocumentWithEmbedding

from .base import LlamaIndexVectorStore


class SimpleFileVectorStore(LlamaIndexVectorStore):
    """Similar to InMemoryVectorStore but is backed by file by default"""

    _li_class: Type[LISimpleVectorStore] = LISimpleVectorStore
    store_text: bool = False

    def __init__(
        self,
        path: str | Path,
        collection_name: str = "default",
        data: Optional[SimpleVectorStoreData] = None,
        fs: Optional[fsspec.AbstractFileSystem] = None,
        **kwargs: Any,
    ) -> None:
        """Initialize params."""
        self._data = data or SimpleVectorStoreData()
        self._fs = fs or fsspec.filesystem("file")
        self._collection_name = collection_name
        self._path = path
        self._save_path = Path(path) / collection_name

        super().__init__(
            data=data,
            fs=fs,
            **kwargs,
        )

        if self._save_path.is_file():
            self._client = self._li_class.from_persist_path(
                persist_path=str(self._save_path), fs=self._fs
            )

    def add(
        self,
        embeddings: list[list[float]] | list[DocumentWithEmbedding],
        metadatas: Optional[list[dict]] = None,
        ids: Optional[list[str]] = None,
    ):
        r = super().add(embeddings, metadatas, ids)
        self._client.persist(str(self._save_path), self._fs)
        return r

    def delete(self, ids: list[str], **kwargs):
        r = super().delete(ids, **kwargs)
        self._client.persist(str(self._save_path), self._fs)
        return r

    def drop(self):
        self._data = SimpleVectorStoreData()
        self._save_path.unlink(missing_ok=True)

    def __persist_flow__(self):
        d = self._data.to_dict()
        d["__type__"] = f"{self._data.__module__}.{self._data.__class__.__qualname__}"
        return {
            "data": d,
            "collection_name": self._collection_name,
            "path": str(self._path),
            # "fs": self._fs,
        }


================================================
FILE: libs/kotaemon/pyproject.toml
================================================
# build backand and build dependencies
[build-system]
requires = ["setuptools >= 61.0", "wheel", "setuptools-git-versioning>=2.0,<3"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = false
packages.find.include = ["kotaemon*"]
packages.find.exclude = ["tests*", "env*"]

[tool.setuptools-git-versioning]
enabled = true
dev_template = "{tag}"
dirty_template = "{tag}"
tag_filter = "v?\\d+(\\.\\d+)*.*"

# metadata and dependencies
[project]
name = "kotaemon"
dynamic = ["version"]
requires-python = ">= 3.10"
description = "Kotaemon core library for AI development."
dependencies = [
    "azure-ai-documentintelligence",
    "beautifulsoup4>=4.12.3,<4.13",
    "click>=8.1.7,<9",
    "cohere>=5.3.2,<6",
    "cookiecutter>=2.6.0,<2.7",
    "fast_langdetect",
    "fastapi<=0.112.1",
    "gradio>=4.31.0,<5",
    "html2text==2024.2.26",
    "langchain<2",
    "langchain-community<1",
    "langchain-openai<2",
    "langchain-google-genai<5",
    "langchain-anthropic<2",
    "langchain-ollama<2",
    "langchain-mistralai<2",
    "langchain-cohere<1",
    "llama-hub>=0.0.79,<0.1.0",
    "llama-index>=0.10.40,<0.11.0",
    "chromadb<=0.5.16",
    "llama-index-vector-stores-chroma>=0.1.9",
    "llama-index-vector-stores-lancedb",
    "openai>=1.23.6,<2",
    "matplotlib",
    "matplotlib-inline",
    "openpyxl>=3.1.2,<3.2",
    "opentelemetry-exporter-otlp-proto-grpc>=1.25.0", # https://github.com/chroma-core/chroma/issues/2571
    "pandas>=2.2.2,<2.3",
    "plotly<6.0.0",
    "PyMuPDF>=1.23,<=1.24.11",
    "pypdf>=4.2.0,<4.3",
    "pylance",
    "python-decouple", # for theflow
    "python-docx>=1.1.0,<1.2",
    "python-dotenv>=1.0.1,<1.1",
    "tenacity>=8.2.3,<8.3",
    "theflow>=0.8.6,<0.9.0",
    "trogon>=0.5.0,<0.6",
    "umap-learn==0.5.5",
    "tavily-python>=0.4.0",
    "pydantic<=2.10.6",
]
readme = "README.md"
authors = [
    { name = "@trducng", email = "john@cinnamon.is" },
    { name = "@lone17", email = "ian@cinnamon.is" },
    { name = "@taprosoft", email = "tadashi@cinnamon.is" },
    { name = "@cin-albert", email = "albert@cinnamon.is" },
]
classifiers = [
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
]

[project.optional-dependencies]
adv = [
    "duckduckgo-search>=6.1.0,<6.2",
    "elasticsearch>=8.13.0,<8.14",
    "fastembed",
    "onnxruntime<v1.20",
    "googlesearch-python>=1.2.4,<1.3",
    "llama-cpp-python<0.2.8",
    "llama-index>=0.10.40,<0.11.0",
    "llama-index-vector-stores-milvus",
    "llama-index-vector-stores-qdrant",
    "mcp[cli]>=1.0.0",
    "sentence-transformers",
    "tabulate",
    "unstructured>=0.15.8,<0.16",
    "wikipedia>=1.4.0,<1.5",
    "voyageai>=0.3.0",
]
dev = [
    "black",
    "coverage",
    "flake8",
    "ipython",
    "pre-commit",
    "pytest",
    "pytest-mock",
    "sphinx",
]
all = ["kotaemon[adv,dev]"]

[project.scripts]
kotaemon = "kotaemon.cli:main"


================================================
FILE: libs/kotaemon/pytest.ini
================================================
[pytest]
minversion = 7.4.0
testpaths = tests
addopts = -ra -q
log_cli=true
log_level=WARNING
log_format = %(asctime)s %(levelname)s %(message)s
log_date_format = %Y-%m-%d %H:%M:%S
log_file = logs/pytest-logs.txt


================================================
FILE: libs/kotaemon/tests/__init__.py
================================================


================================================
FILE: libs/kotaemon/tests/_test_multimodal_reader.py
================================================
# TODO: This test is broken and should be rewritten
from pathlib import Path

from kotaemon.loaders import AdobeReader

# from dotenv import load_dotenv


input_file = Path(__file__).parent / "resources" / "multimodal.pdf"

# load_dotenv()


def test_adobe_reader():
    reader = AdobeReader()
    documents = reader.load_data(input_file)
    table_docs = [doc for doc in documents if doc.metadata.get("type", "") == "table"]
    assert len(table_docs) == 2

    figure_docs = [doc for doc in documents if doc.metadata.get("type", "") == "image"]
    assert len(figure_docs) == 2


================================================
FILE: libs/kotaemon/tests/conftest.py
================================================
import pytest


@pytest.fixture(scope="function")
def mock_google_search(monkeypatch):
    import googlesearch

    def result(*args, **kwargs):
        yield googlesearch.SearchResult(
            url="https://www.cinnamon.is/en/",
            title="Cinnamon AI",
            description="Cinnamon AI is an enterprise AI company.",
        )

    monkeypatch.setattr(googlesearch, "search", result)


def if_haystack_not_installed():
    try:
        import haystack  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_sentence_bert_not_installed():
    try:
        import sentence_transformers  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_sentence_fastembed_not_installed():
    try:
        import fastembed  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_unstructured_pdf_not_installed():
    try:
        import unstructured  # noqa: F401
        from unstructured.partition.pdf import partition_pdf  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_cohere_not_installed():
    try:
        import cohere  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_llama_cpp_not_installed():
    try:
        import llama_cpp  # noqa: F401
    except ImportError:
        return True
    else:
        return False


def if_voyageai_not_installed():
    try:
        import voyageai  # noqa: F401
    except ImportError:
        return True
    else:
        return False


skip_when_haystack_not_installed = pytest.mark.skipif(
    if_haystack_not_installed(), reason="Haystack is not installed"
)

skip_when_sentence_bert_not_installed = pytest.mark.skipif(
    if_sentence_bert_not_installed(), reason="SBert is not installed"
)

skip_when_fastembed_not_installed = pytest.mark.skipif(
    if_sentence_fastembed_not_installed(), reason="fastembed is not installed"
)

skip_when_unstructured_pdf_not_installed = pytest.mark.skipif(
    if_unstructured_pdf_not_installed(), reason="unstructured is not installed"
)

skip_when_cohere_not_installed = pytest.mark.skipif(
    if_cohere_not_installed(), reason="cohere is not installed"
)

skip_openai_lc_wrapper_test = pytest.mark.skipif(
    True, reason="OpenAI LC wrapper test is skipped"
)

skip_llama_cpp_not_installed = pytest.mark.skipif(
    if_llama_cpp_not_installed(), reason="llama_cpp is not installed"
)

skip_when_voyageai_not_installed = pytest.mark.skipif(
    if_voyageai_not_installed(), reason="voyageai is not installed"
)


================================================
FILE: libs/kotaemon/tests/resources/dummy.mhtml
================================================
MIME-Version: 1.0
Content-Type: multipart/related; boundary="----=_NextPart_01CF5AE5.5C24CD00"

This document is a Single File Web Page, also known as a Web Archive file.  If you are seeing this message, your browser or editor doesn't support Web Archive files.  Please download a browser that supports Web Archive, such as Windows® Internet Explorer®.

------=_NextPart_01CF5AE5.5C24CD00
Content-Location: file:///C:/D16BB227/testing.htm
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html; charset="us-ascii"

<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
xmlns=3D"http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=3DContent-Type content=3D"text/html; charset=3Dus-ascii">
<meta name=3DProgId content=3DWord.Document>
<meta name=3DGenerator content=3D"Microsoft Word 12">
<meta name=3DOriginator content=3D"Microsoft Word 12">
<link rel=3DFile-List href=3D"testing_files/filelist.xml">
<!--[if gte mso 9]><xml>
 <o:DocumentProperties>
  <o:Author>dtobias</o:Author>
  <o:Template>testing.mht</o:Template>
  <o:LastAuthor>dtobias</o:LastAuthor>
  <o:Revision>2</o:Revision>
  <o:TotalTime>1</o:TotalTime>
  <o:LastPrinted>2014-04-18T13:05:00Z</o:LastPrinted>
  <o:Created>2014-04-18T13:05:00Z</o:Created>
  <o:LastSaved>2014-04-18T13:05:00Z</o:LastSaved>
  <o:Pages>1</o:Pages>
  <o:Words>49</o:Words>
  <o:Characters>280</o:Characters>
  <o:Company>Microsoft</o:Company>
  <o:Lines>2</o:Lines>
  <o:Paragraphs>1</o:Paragraphs>
  <o:CharactersWithSpaces>328</o:CharactersWithSpaces>
  <o:Version>12.00</o:Version>
 </o:DocumentProperties>
</xml><![endif]-->
<link rel=3DthemeData href=3D"testing_files/themedata.thmx">
<link rel=3DcolorSchemeMapping href=3D"testing_files/colorschememapping.xml=
">
<!--[if gte mso 9]><xml>
 <w:WordDocument>
  <w:SpellingState>Clean</w:SpellingState>
  <w:TrackMoves>false</w:TrackMoves>
  <w:TrackFormatting/>
  <w:PunctuationKerning/>
  <w:ValidateAgainstSchemas/>
  <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
  <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
  <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
  <w:DoNotPromoteQF/>
  <w:LidThemeOther>EN-US</w:LidThemeOther>
  <w:LidThemeAsian>X-NONE</w:LidThemeAsian>
  <w:LidThemeComplexScript>X-NONE</w:LidThemeComplexScript>
  <w:Compatibility>
   <w:BreakWrappedTables/>
   <w:SnapToGridInCell/>
   <w:WrapTextWithPunct/>
   <w:UseAsianBreakRules/>
   <w:DontGrowAutofit/>
   <w:SplitPgBreakAndParaMark/>
   <w:DontVertAlignCellWithSp/>
   <w:DontBreakConstrainedForcedTables/>
   <w:DontVertAlignInTxbx/>
   <w:Word11KerningPairs/>
   <w:CachedColBalance/>
  </w:Compatibility>
  <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
  <m:mathPr>
   <m:mathFont m:val=3D"Cambria Math"/>
   <m:brkBin m:val=3D"before"/>
   <m:brkBinSub m:val=3D"&#45;-"/>
   <m:smallFrac m:val=3D"off"/>
   <m:dispDef/>
   <m:lMargin m:val=3D"0"/>
   <m:rMargin m:val=3D"0"/>
   <m:defJc m:val=3D"centerGroup"/>
   <m:wrapIndent m:val=3D"1440"/>
   <m:intLim m:val=3D"subSup"/>
   <m:naryLim m:val=3D"undOvr"/>
  </m:mathPr></w:WordDocument>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <w:LatentStyles DefLockedState=3D"false" DefUnhideWhenUsed=3D"true"
  DefSemiHidden=3D"true" DefQFormat=3D"false" DefPriority=3D"99"
  LatentStyleCount=3D"267">
  <w:LsdException Locked=3D"false" Priority=3D"0" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Normal"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"heading 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 7"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 8"/>
  <w:LsdException Locked=3D"false" Priority=3D"9" QFormat=3D"true" Name=3D"=
heading 9"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 7"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 8"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" Name=3D"toc 9"/>
  <w:LsdException Locked=3D"false" Priority=3D"35" QFormat=3D"true" Name=3D=
"caption"/>
  <w:LsdException Locked=3D"false" Priority=3D"10" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Title"/>
  <w:LsdException Locked=3D"false" Priority=3D"1" Name=3D"Default Paragraph=
 Font"/>
  <w:LsdException Locked=3D"false" Priority=3D"11" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtitle"/>
  <w:LsdException Locked=3D"false" Priority=3D"22" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Strong"/>
  <w:LsdException Locked=3D"false" Priority=3D"20" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Emphasis"/>
  <w:LsdException Locked=3D"false" Priority=3D"59" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Table Grid"/>
  <w:LsdException Locked=3D"false" UnhideWhenUsed=3D"false" Name=3D"Placeho=
lder Text"/>
  <w:LsdException Locked=3D"false" Priority=3D"1" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"No Spacing"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 1"/>
  <w:LsdException Locked=3D"false" UnhideWhenUsed=3D"false" Name=3D"Revisio=
n"/>
  <w:LsdException Locked=3D"false" Priority=3D"34" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"List Paragraph"/>
  <w:LsdException Locked=3D"false" Priority=3D"29" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Quote"/>
  <w:LsdException Locked=3D"false" Priority=3D"30" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Quote"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 1"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 2"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 3"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 4"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 5"/>
  <w:LsdException Locked=3D"false" Priority=3D"60" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Shading Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"61" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light List Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"62" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Light Grid Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"63" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 1 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"64" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Shading 2 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"65" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 1 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"66" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium List 2 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"67" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 1 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"68" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 2 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"69" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Medium Grid 3 Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"70" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Dark List Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"71" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Shading Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"72" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful List Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"73" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" Name=3D"Colorful Grid Accent 6"/>
  <w:LsdException Locked=3D"false" Priority=3D"19" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtle Emphasis"/>
  <w:LsdException Locked=3D"false" Priority=3D"21" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Emphasis"/>
  <w:LsdException Locked=3D"false" Priority=3D"31" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Subtle Reference"/>
  <w:LsdException Locked=3D"false" Priority=3D"32" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Intense Reference"/>
  <w:LsdException Locked=3D"false" Priority=3D"33" SemiHidden=3D"false"
   UnhideWhenUsed=3D"false" QFormat=3D"true" Name=3D"Book Title"/>
  <w:LsdException Locked=3D"false" Priority=3D"37" Name=3D"Bibliography"/>
  <w:LsdException Locked=3D"false" Priority=3D"39" QFormat=3D"true" Name=3D=
"TOC Heading"/>
 </w:LatentStyles>
</xml><![endif]-->
<style>
<!--
 /* Font Definitions */
 @font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;
	mso-font-charset:1;
	mso-generic-font-family:roman;
	mso-font-format:other;
	mso-font-pitch:variable;
	mso-font-signature:0 0 0 0 0 0;}
@font-face
	{font-family:Cambria;
	panose-1:2 4 5 3 5 4 6 3 2 4;
	mso-font-charset:0;
	mso-generic-font-family:roman;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1073743103 0 0 415 0;}
@font-face
	{font-family:Calibri;
	panose-1:2 15 5 2 2 2 4 3 2 4;
	mso-font-charset:0;
	mso-generic-font-family:swiss;
	mso-font-pitch:variable;
	mso-font-signature:-536870145 1073786111 1 0 415 0;}
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal
	{mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-parent:"";
	margin-top:0in;
	margin-right:0in;
	margin-bottom:10.0pt;
	margin-left:0in;
	line-height:115%;
	mso-pagination:widow-orphan;
	font-size:11.0pt;
	font-family:"Calibri","sans-serif";
	mso-fareast-font-family:Calibri;
	mso-bidi-font-family:"Times New Roman";}
p.MsoTitle, li.MsoTitle, div.MsoTitle
	{mso-style-priority:10;
	mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Title Char";
	mso-style-next:Normal;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:15.0pt;
	margin-left:0in;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	border:none;
	mso-border-bottom-alt:solid #4F81BD 1.0pt;
	padding:0in;
	mso-padding-alt:0in 0in 4.0pt 0in;
	font-size:26.0pt;
	font-family:"Cambria","serif";
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";
	color:#17365D;
	letter-spacing:.25pt;
	mso-font-kerning:14.0pt;}
p.MsoTitleCxSpFirst, li.MsoTitleCxSpFirst, div.MsoTitleCxSpFirst
	{mso-style-priority:10;
	mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Title Char";
	mso-style-next:Normal;
	mso-style-type:export-only;
	margin:0in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	border:none;
	mso-border-bottom-alt:solid #4F81BD 1.0pt;
	padding:0in;
	mso-padding-alt:0in 0in 4.0pt 0in;
	font-size:26.0pt;
	font-family:"Cambria","serif";
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";
	color:#17365D;
	letter-spacing:.25pt;
	mso-font-kerning:14.0pt;}
p.MsoTitleCxSpMiddle, li.MsoTitleCxSpMiddle, div.MsoTitleCxSpMiddle
	{mso-style-priority:10;
	mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Title Char";
	mso-style-next:Normal;
	mso-style-type:export-only;
	margin:0in;
	margin-bottom:.0001pt;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	border:none;
	mso-border-bottom-alt:solid #4F81BD 1.0pt;
	padding:0in;
	mso-padding-alt:0in 0in 4.0pt 0in;
	font-size:26.0pt;
	font-family:"Cambria","serif";
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";
	color:#17365D;
	letter-spacing:.25pt;
	mso-font-kerning:14.0pt;}
p.MsoTitleCxSpLast, li.MsoTitleCxSpLast, div.MsoTitleCxSpLast
	{mso-style-priority:10;
	mso-style-unhide:no;
	mso-style-qformat:yes;
	mso-style-link:"Title Char";
	mso-style-next:Normal;
	mso-style-type:export-only;
	margin-top:0in;
	margin-right:0in;
	margin-bottom:15.0pt;
	margin-left:0in;
	mso-add-space:auto;
	mso-pagination:widow-orphan;
	border:none;
	mso-border-bottom-alt:solid #4F81BD 1.0pt;
	padding:0in;
	mso-padding-alt:0in 0in 4.0pt 0in;
	font-size:26.0pt;
	font-family:"Cambria","serif";
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";
	color:#17365D;
	letter-spacing:.25pt;
	mso-font-kerning:14.0pt;}
span.TitleChar
	{mso-style-name:"Title Char";
	mso-style-priority:10;
	mso-style-unhide:no;
	mso-style-locked:yes;
	mso-style-link:Title;
	mso-ansi-font-size:26.0pt;
	mso-bidi-font-size:26.0pt;
	font-family:"Cambria","serif";
	mso-ascii-font-family:Cambria;
	mso-fareast-font-family:"Times New Roman";
	mso-hansi-font-family:Cambria;
	mso-bidi-font-family:"Times New Roman";
	color:#17365D;
	letter-spacing:.25pt;
	mso-font-kerning:14.0pt;}
span.SpellE
	{mso-style-name:"";
	mso-spl-e:yes;}
.MsoChpDefault
	{mso-style-type:export-only;
	mso-default-props:yes;
	font-size:10.0pt;
	mso-ansi-font-size:10.0pt;
	mso-bidi-font-size:10.0pt;
	mso-ascii-font-family:Calibri;
	mso-fareast-font-family:Calibri;
	mso-hansi-font-family:Calibri;}
@page WordSection1
	{size:8.5in 11.0in;
	margin:1.0in 1.0in 1.0in 1.0in;
	mso-header-margin:.5in;
	mso-footer-margin:.5in;
	mso-paper-source:0;}
div.WordSection1
	{page:WordSection1;}
-->
</style>
<!--[if gte mso 10]>
<style>
 /* Style Definitions */
 table.MsoNormalTable
	{mso-style-name:"Table Normal";
	mso-tstyle-rowband-size:0;
	mso-tstyle-colband-size:0;
	mso-style-noshow:yes;
	mso-style-priority:99;
	mso-style-qformat:yes;
	mso-style-parent:"";
	mso-padding-alt:0in 5.4pt 0in 5.4pt;
	mso-para-margin:0in;
	mso-para-margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:"Calibri","sans-serif";}
</style>
<![endif]--><!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext=3D"edit" spidmax=3D"2050"/>
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext=3D"edit">
  <o:idmap v:ext=3D"edit" data=3D"1"/>
 </o:shapelayout></xml><![endif]-->
</head>

<body lang=3DEN-US style=3D'tab-interval:.5in'>

<div class=3DWordSection1>

<div style=3D'mso-element:para-border-div;border:none;border-bottom:solid #=
4F81BD 1.0pt;
padding:0in 0in 4.0pt 0in'>

<p class=3DMsoTitle>This is a test.</p>

</div>

<p class=3DMsoNormal>This is <b style=3D'mso-bidi-font-weight:normal'>bold,=
 <i
style=3D'mso-bidi-font-style:normal'>italic, and <u>underlined.</u></i></b>=
</p>

<p class=3DMsoNormal><span class=3DSpellE>asdakl</span> <span class=3DSpell=
E>fskljf</span>
<span class=3DSpellE>sklf</span> <span class=3DSpellE>jkslaf</span>; <span
class=3DSpellE>djks</span> <span class=3DSpellE>dlkfa</span> <span class=3D=
SpellE>sk</span>
<span class=3DSpellE>sdjkl</span> <span class=3DSpellE>ksjkl</span> <span
class=3DSpellE>jsjk</span> <span class=3DSpellE>skdjjks</span> <span class=
=3DSpellE>i</span>
w <span class=3DSpellE>ie</span> <span class=3DSpellE>sjkfksd</span> <span
class=3DSpellE>fjisdf</span> <span class=3DSpellE>jks</span> <span class=3D=
SpellE>fjs</span>
<span class=3DSpellE>kdj</span> <span class=3DSpellE>fsk</span> <span class=
=3DSpellE>dfjskd</span>
<span class=3DSpellE>fjskd</span> <span class=3DSpellE>fjsd</span> <span
class=3DSpellE>kfjsk</span> f <span class=3DSpellE>jskdf</span> <span class=
=3DSpellE>jskd</span>
<span class=3DSpellE>fjsk</span> <span class=3DSpellE>dfjskdf</span> <span
class=3DSpellE>jsifj</span> <span class=3DSpellE>sifj</span> <span class=3D=
SpellE>sk</span>
<span class=3DSpellE>fjks</span> <span class=3DSpellE>fjksd</span> <span
class=3DSpellE>fjskdf</span> <span class=3DSpellE>kjs</span> <span class=3D=
SpellE>jdfksk</span>
<span class=3DSpellE>fdjs</span> <span class=3DSpellE>fksj</span> <span
class=3DSpellE>fks</span> <span class=3DSpellE>dfjs</span> <span class=3DSp=
ellE>dfks</span>
<span class=3DSpellE>fdjsk</span> <span class=3DSpellE>fjskdfjskdf</span> <=
span
class=3DSpellE>sjkf</span> <span class=3DSpellE>skjf</span> <span class=3DS=
pellE>sjkdf</span>
<span class=3DSpellE>skfjsfjk</span> s</p>

<p class=3DMsoNormal>The end.</p>

</div>

</body>

</html>

------=_NextPart_01CF5AE5.5C24CD00
Content-Location: file:///C:/D16BB227/testing_files/themedata.thmx
Content-Transfer-Encoding: base64
Content-Type: application/vnd.ms-officetheme

UEsDBBQABgAIAAAAIQCCirwT+gAAABwCAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbKyRy2rDMBBF
94X+g9C22HK6KKXYzqJJd30s0g8Y5LEtao+ENAnJ33fsuFC6CC10IxBizpl7Va6P46AOGJPzVOlV
XmiFZH3jqKv0++4pu9cqMVADgyes9AmTXtfXV+XuFDApmaZU6Z45PBiTbI8jpNwHJHlpfRyB5Ro7
E8B+QIfmtijujPXESJzxxNB1+SoLRNegeoPILzCKx7Cg8Pv5DCSAmAtYq8czYVqi0hDC4CywRDAH
an7oM9+2zmLj7X4UaT6DF9jNBDO/XGD1P+ov5wZb2A+stkfp4lx/xCH9LdtSay6Tc/7Uu5AuGC6X
t7Rh5r+tPwEAAP//AwBQSwMEFAAGAAgAAAAhAKXWp+fAAAAANgEAAAsAAABfcmVscy8ucmVsc4SP
z2rDMAyH74W9g9F9UdLDGCV2L6WQQy+jfQDhKH9oIhvbG+vbT8cGCrsIhKTv96k9/q6L+eGU5yAW
mqoGw+JDP8to4XY9v3+CyYWkpyUIW3hwhqN727VfvFDRozzNMRulSLYwlRIPiNlPvFKuQmTRyRDS
SkXbNGIkf6eRcV/XH5ieGeA2TNP1FlLXN2Cuj6jJ/7PDMMyeT8F/ryzlRQRuN5RMaeRioagv41O9
kKhlqtQe0LW4+db9AQAA//8DAFBLAwQUAAYACAAAACEAa3mWFoMAAACKAAAAHAAAAHRoZW1lL3Ro
ZW1lL3RoZW1lTWFuYWdlci54bWwMzE0KwyAQQOF9oXeQ2TdjuyhFYrLLrrv2AEOcGkHHoNKf29fl
44M3zt8U1ZtLDVksnAcNimXNLoi38Hwspxuo2kgcxSxs4ccV5ul4GMm0jRPfSchzUX0j1ZCFrbXd
INa1K9Uh7yzdXrkkaj2LR1fo0/cp4kXrKyYKAjj9AQAA//8DAFBLAwQUAAYACAAAACEAlrWt4pYG
AABQGwAAFgAAAHRoZW1lL3RoZW1lL3RoZW1lMS54bWzsWU9v2zYUvw/YdyB0b2MndhoHdYrYsZst
TRvEboceaYmW2FCiQNJJfRva44ABw7phhxXYbYdhW4EW2KX7NNk6bB3Qr7BHUpLFWF6SNtiKrT4k
Evnj+/8eH6mr1+7HDB0SISlP2l79cs1DJPF5QJOw7d0e9i+teUgqnASY8YS0vSmR3rWN99+7itdV
RGKCYH0i13Hbi5RK15eWpA/DWF7mKUlgbsxFjBW8inApEPgI6MZsablWW12KMU08lOAYyN4aj6lP
0FCT9DZy4j0Gr4mSesBnYqBJE2eFwQYHdY2QU9llAh1i1vaAT8CPhuS+8hDDUsFE26uZn7e0cXUJ
r2eLmFqwtrSub37ZumxBcLBseIpwVDCt9xutK1sFfQNgah7X6/W6vXpBzwCw74OmVpYyzUZ/rd7J
aZZA9nGedrfWrDVcfIn+ypzMrU6n02xlsliiBmQfG3P4tdpqY3PZwRuQxTfn8I3OZre76uANyOJX
5/D9K63Vhos3oIjR5GAOrR3a72fUC8iYs+1K+BrA12oZfIaCaCiiS7MY80QtirUY3+OiDwANZFjR
BKlpSsbYhyju4ngkKNYM8DrBpRk75Mu5Ic0LSV/QVLW9D1MMGTGj9+r596+eP0XHD54dP/jp+OHD
4wc/WkLOqm2chOVVL7/97M/HH6M/nn7z8tEX1XhZxv/6wye//Px5NRDSZybOiy+f/PbsyYuvPv39
u0cV8E2BR2X4kMZEopvkCO3zGBQzVnElJyNxvhXDCNPyis0klDjBmksF/Z6KHPTNKWaZdxw5OsS1
4B0B5aMKeH1yzxF4EImJohWcd6LYAe5yzjpcVFphR/MqmXk4ScJq5mJSxu1jfFjFu4sTx7+9SQp1
Mw9LR/FuRBwx9xhOFA5JQhTSc/yAkArt7lLq2HWX+oJLPlboLkUdTCtNMqQjJ5pmi7ZpDH6ZVukM
/nZss3sHdTir0nqLHLpIyArMKoQfEuaY8TqeKBxXkRzimJUNfgOrqErIwVT4ZVxPKvB0SBhHvYBI
WbXmlgB9S07fwVCxKt2+y6axixSKHlTRvIE5LyO3+EE3wnFahR3QJCpjP5AHEKIY7XFVBd/lbobo
d/ADTha6+w4ljrtPrwa3aeiINAsQPTMR2pdQqp0KHNPk78oxo1CPbQxcXDmGAvji68cVkfW2FuJN
2JOqMmH7RPldhDtZdLtcBPTtr7lbeJLsEQjz+Y3nXcl9V3K9/3zJXZTPZy20s9oKZVf3DbYpNi1y
vLBDHlPGBmrKyA1pmmQJ+0TQh0G9zpwOSXFiSiN4zOq6gwsFNmuQ4OojqqJBhFNosOueJhLKjHQo
UcolHOzMcCVtjYcmXdljYVMfGGw9kFjt8sAOr+jh/FxQkDG7TWgOnzmjFU3grMxWrmREQe3XYVbX
Qp2ZW92IZkqdw61QGXw4rxoMFtaEBgRB2wJWXoXzuWYNBxPMSKDtbvfe3C3GCxfpIhnhgGQ+0nrP
+6hunJTHirkJgNip8JE+5J1itRK3lib7BtzO4qQyu8YCdrn33sRLeQTPvKTz9kQ6sqScnCxBR22v
1VxuesjHadsbw5kWHuMUvC51z4dZCBdDvhI27E9NZpPlM2+2csXcJKjDNYW1+5zCTh1IhVRbWEY2
NMxUFgIs0Zys/MtNMOtFKWAj/TWkWFmDYPjXpAA7uq4l4zHxVdnZpRFtO/ualVI+UUQMouAIjdhE
7GNwvw5V0CegEq4mTEXQL3CPpq1tptzinCVd+fbK4Ow4ZmmEs3KrUzTPZAs3eVzIYN5K4oFulbIb
5c6vikn5C1KlHMb/M1X0fgI3BSuB9oAP17gCI52vbY8LFXGoQmlE/b6AxsHUDogWuIuFaQgquEw2
/wU51P9tzlkaJq3hwKf2aYgEhf1IRYKQPShLJvpOIVbP9i5LkmWETESVxJWpFXtEDgkb6hq4qvd2
D0UQ6qaaZGXA4E7Gn/ueZdAo1E1OOd+cGlLsvTYH/unOxyYzKOXWYdPQ5PYvRKzYVe16szzfe8uK
6IlZm9XIswKYlbaCVpb2rynCObdaW7HmNF5u5sKBF+c1hsGiIUrhvgfpP7D/UeEz+2VCb6hDvg+1
FcGHBk0Mwgai+pJtPJAukHZwBI2THbTBpElZ02atk7ZavllfcKdb8D1hbC3ZWfx9TmMXzZnLzsnF
izR2ZmHH1nZsoanBsydTFIbG+UHGOMZ80ip/deKje+DoLbjfnzAlTTDBNyWBofUcmDyA5LcczdKN
vwAAAP//AwBQSwMEFAAGAAgAAAAhAA3RkJ+2AAAAGwEAACcAAAB0aGVtZS90aGVtZS9fcmVscy90
aGVtZU1hbmFnZXIueG1sLnJlbHOEj00KwjAUhPeCdwhvb9O6EJEm3YjQrdQDhOQ1DTY/JFHs7Q2u
LAguh2G+mWm7l53JE2My3jFoqhoIOumVcZrBbbjsjkBSFk6J2TtksGCCjm837RVnkUsoTSYkUigu
MZhyDidKk5zQilT5gK44o49W5CKjpkHIu9BI93V9oPGbAXzFJL1iEHvVABmWUJr/s/04GolnLx8W
Xf5RQXPZhQUoosbM4CObqkwEylu6usTfAAAA//8DAFBLAQItABQABgAIAAAAIQCCirwT+gAAABwC
AAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0AFAAGAAgAAAAhAKXW
p+fAAAAANgEAAAsAAAAAAAAAAAAAAAAAKwEAAF9yZWxzLy5yZWxzUEsBAi0AFAAGAAgAAAAhAGt5
lhaDAAAAigAAABwAAAAAAAAAAAAAAAAAFAIAAHRoZW1lL3RoZW1lL3RoZW1lTWFuYWdlci54bWxQ
SwECLQAUAAYACAAAACEAlrWt4pYGAABQGwAAFgAAAAAAAAAAAAAAAADRAgAAdGhlbWUvdGhlbWUv
dGhlbWUxLnhtbFBLAQItABQABgAIAAAAIQAN0ZCftgAAABsBAAAnAAAAAAAAAAAAAAAAAJsJAAB0
aGVtZS90aGVtZS9fcmVscy90aGVtZU1hbmFnZXIueG1sLnJlbHNQSwUGAAAAAAUABQBdAQAAlgoA
AAAA

------=_NextPart_01CF5AE5.5C24CD00
Content-Location: file:///C:/D16BB227/testing_files/colorschememapping.xml
Content-Transfer-Encoding: quoted-printable
Content-Type: text/xml

<?xml version=3D"1.0" encoding=3D"UTF-8" standalone=3D"yes"?>
<a:clrMap xmlns:a=3D"http://schemas.openxmlformats.org/drawingml/2006/main"=
 bg1=3D"lt1" tx1=3D"dk1" bg2=3D"lt2" tx2=3D"dk2" accent1=3D"accent1" accent=
2=3D"accent2" accent3=3D"accent3" accent4=3D"accent4" accent5=3D"accent5" a=
ccent6=3D"accent6" hlink=3D"hlink" folHlink=3D"folHlink"/>
------=_NextPart_01CF5AE5.5C24CD00
Content-Location: file:///C:/D16BB227/testing_files/filelist.xml
Content-Transfer-Encoding: quoted-printable
Content-Type: text/xml; charset="utf-8"

<xml xmlns:o=3D"urn:schemas-microsoft-com:office:office">
 <o:MainFile HRef=3D"../testing.htm"/>
 <o:File HRef=3D"themedata.thmx"/>
 <o:File HRef=3D"colorschememapping.xml"/>
 <o:File HRef=3D"filelist.xml"/>
</xml>
------=_NextPart_01CF5AE5.5C24CD00--


================================================
FILE: libs/kotaemon/tests/resources/embedding_openai.json
================================================
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "index": 0,
      "embedding": [
        0.006555966101586819,
        0.003670461941510439,
        -0.011642491444945335,
        -0.026776473969221115,
        -0.012383491732180119,
        -0.0014341175556182861,
        -0.013375678099691868,
        0.009356695227324963,
        -0.006364436354488134,
        -0.0294390507042408,
        0.023950627073645592,
        0.0029859787318855524,
        -0.023234745487570763,
        -0.009205983020365238,
        0.006744355894625187,
        0.0011790062999352813,
        0.02607315219938755,
        -0.018437083810567856,
        0.008904559537768364,
        0.009620440192520618,
        -0.01306169480085373,
        -0.0011358336778357625,
        0.007253008428961039,
        0.00875384733080864,
        -0.012710033915936947,
        0.0037206991109997034,
        0.005419347435235977,
        -0.017243949696421623,
        0.036246202886104584,
        -0.0266759991645813,
        0.012647237628698349,
        -0.008552898652851582,
        -0.00762350857257843,
        -0.012546762824058533,
        0.007083457428961992,
        -0.014078999869525433,
        0.0048761568032205105,
        -0.013689660467207432,
        0.018211016431450844,
        -0.014367864467203617,
        0.008307991549372673,
        0.006022194866091013,
        0.005457025486975908,
        -0.006320478860288858,
        -0.03609549254179001,
        0.012986338697373867,
        0.00017592862423043698,
        -0.016427593305706978,
        -0.004734864458441734,
        0.0257717277854681,
        0.01944182999432087,
        -0.00211938563734293,
        -0.0147195253521204,
        0.01332544069737196,
        -0.017670966684818268,
        0.0013956546317785978,
        -0.04024006798863411,
        0.024754423648118973,
        0.032051388174295425,
        -0.018713390454649925,
        0.011265711858868599,
        0.012370931915938854,
        -0.013576626777648926,
        0.0040974789299070835,
        -0.002153923735022545,
        0.009915584698319435,
        0.006637601647526026,
        0.011353626847267151,
        -0.019919084385037422,
        0.030142372474074364,
        0.01966789737343788,
        -0.008213796652853489,
        -0.004213652573525906,
        -0.009162025526165962,
        0.012446288019418716,
        0.008979915641248226,
        -0.02098662778735161,
        0.003113141981884837,
        0.0018838982796296477,
        -0.0008289152756333351,
        0.02722861059010029,
        -0.03355850651860237,
        -0.004408321809023619,
        0.017859356477856636,
        0.019893966615200043,
        -0.0014137086691334844,
        -0.0021602034103125334,
        0.017959831282496452,
        -0.0114603815600276,
        -0.020069796591997147,
        0.007604669313877821,
        0.001281835837289691,
        0.01132222916930914,
        0.006668999791145325,
        -0.01743233948945999,
        0.0048761568032205105,
        0.0016923686489462852,
        0.02356128767132759,
        -0.005796127021312714,
        -0.041470881551504135,
        -0.01292354241013527,
        -0.005824385676532984,
        -0.010700542479753494,
        -0.017005322501063347,
        -0.007950050756335258,
        0.011560855433344841,
        -0.005485283676534891,
        -0.0002590360236354172,
        0.028434304520487785,
        0.007447678130120039,
        -0.026198744773864746,
        0.009306457825005054,
        -0.00500488979741931,
        -0.03107176162302494,
        -0.0007555217016488314,
        -0.008056805469095707,
        0.007479076273739338,
        0.003978165332227945,
        -0.005984516814351082,
        -0.02778122015297413,
        0.015272135846316814,
        0.029338575899600983,
        0.010964288376271725,
        -0.023548727855086327,
        0.007014381233602762,
        0.0014137086691334844,
        -0.03380969539284706,
        -0.016741575673222542,
        -0.005230957642197609,
        -0.004794521257281303,
        0.0315992534160614,
        0.0041100382804870605,
        0.010644025169312954,
        -0.0014294078573584557,
        -0.035367049276828766,
        0.0246916264295578,
        -0.007096016779541969,
        0.02096150815486908,
        -0.017030440270900726,
        -0.011906237341463566,
        0.006273381412029266,
        0.021237812936306,
        0.01757049188017845,
        -0.013224965892732143,
        -0.005092805251479149,
        0.01288586389273405,
        0.004486817866563797,
        -0.016339678317308426,
        0.008917118422687054,
        -0.015083746053278446,
        0.008377067744731903,
        0.0185626782476902,
        0.012961219996213913,
        -0.002535413019359112,
        0.0050143091939389706,
        0.016025694087147713,
        0.004618690814822912,
        0.0205470509827137,
        0.008000288158655167,
        -0.013074253685772419,
        -0.0007264782907441258,
        -0.0004474258457776159,
        0.0033125211484730244,
        -0.03084569424390793,
        0.009400652721524239,
        0.015787066891789436,
        0.02607315219938755,
        0.003689300734549761,
        -0.013940847478806973,
        -0.02604803442955017,
        -0.011454101651906967,
        0.007340923883020878,
        -0.043178949505090714,
        0.02660064399242401,
        -0.01593777909874916,
        0.00514304218813777,
        -0.001305384561419487,
        0.022820288315415382,
        -0.012697474099695683,
        -0.017269067466259003,
        -0.03024284727871418,
        0.011617372743785381,
        0.008992474526166916,
        0.025897322222590446,
        0.00629850011318922,
        -0.00014511904737446457,
        0.009890465997159481,
        -0.006364436354488134,
        0.00384629238396883,
        -0.017030440270900726,
        0.016666220501065254,
        0.03333244100213051,
        0.021011745557188988,
        0.01844964362680912,
        -0.6892555952072144,
        -0.007196491584181786,
        0.025319593027234077,
        0.02024562656879425,
        0.006245122756808996,
        0.009909304790198803,
        0.012176262214779854,
        0.027806337922811508,
        0.006518288049846888,
        0.0274546779692173,
        -0.02107454277575016,
        0.013011457398533821,
        -0.015083746053278446,
        -0.010719381272792816,
        -0.0002598209830466658,
        -0.00823263544589281,
        0.0055763390846550465,
        -0.012339534237980843,
        -0.011881118640303612,
        0.015196779742836952,
        -0.010141652077436447,
        0.03129782900214195,
        -0.022104406729340553,
        -0.014568813145160675,
        0.009928143583238125,
        0.011730406433343887,
        0.0025102945510298014,
        -0.009739753790199757,
        -0.006964143831282854,
        0.030192609876394272,
        -0.0021272350568324327,
        0.0114603815600276,
        9.394961671205238e-05,
        0.014154355973005295,
        0.06611227244138718,
        0.007610949221998453,
        -0.005463304929435253,
        0.008088203147053719,
        0.009827669709920883,
        0.028986915946006775,
        -0.019077610224485397,
        -0.022267676889896393,
        0.012559321708977222,
        -0.01662854291498661,
        -0.002763050841167569,
        0.009205983020365238,
        0.012659796513617039,
        -0.0024051100481301546,
        0.0022543983068317175,
        0.0017849936848506331,
        0.014568813145160675,
        -0.004851038102060556,
        0.010857533663511276,
        0.019504627212882042,
        -0.005343991331756115,
        0.0071588135324418545,
        0.02151411771774292,
        0.011962753720581532,
        -0.002935741562396288,
        0.004521355964243412,
        0.0005718416068702936,
        0.017168592661619186,
        -0.0016280021518468857,
        0.0038494321051985025,
        -0.022443508729338646,
        0.02381247468292713,
        -0.021313169971108437,
        0.01400364376604557,
        -0.008929678238928318,
        -0.012050669640302658,
        -0.0018368008313700557,
        -0.007994008250534534,
        -0.018047746270895004,
        -0.014744644053280354,
        0.013965966179966927,
        0.028107762336730957,
        0.01681693270802498,
        -0.0033219405449926853,
        -0.006160347256809473,
        0.01966789737343788,
        0.010926609858870506,
        0.013237525708973408,
        -0.014066440984606743,
        -0.01764584705233574,
        0.02349849045276642,
        -0.019780931994318962,
        -0.030067017301917076,
        -0.016276881098747253,
        0.010430516675114632,
        0.011479220353066921,
        0.030192609876394272,
        0.0033250804990530014,
        -0.012370931915938854,
        -0.01980605162680149,
        0.02883620373904705,
        0.0005384809337556362,
        -0.011548296548426151,
        0.003821173682808876,
        0.027303965762257576,
        0.0004238771216478199,
        0.014933033846318722,
        -0.0028038686141371727,
        0.0003752097545657307,
        0.00652456795796752,
        0.010279805399477482,
        0.006656440440565348,
        -0.00809448305517435,
        0.033156611025333405,
        0.02444044128060341,
        -0.030142372474074364,
        0.006317338906228542,
        0.004094338975846767,
        -0.04071732237935066,
        0.02121269516646862,
        0.008471262641251087,
        -0.032001152634620667,
        0.0013022447237744927,
        0.016013136133551598,
        0.026374576613307,
        -0.014694406650960445,
        0.023159390315413475,
        0.0020424597896635532,
        0.013664542697370052,
        0.0057458896189928055,
        0.0041539957746863365,
        0.005303173791617155,
        0.0003883577883243561,
        0.004342385567724705,
        -0.01690484769642353,
        0.001676669460721314,
        0.020697763189673424,
        0.005133622791618109,
        0.00046273251064121723,
        -0.005651694722473621,
        0.009808829985558987,
        0.00202676048502326,
        0.009896745905280113,
        -0.017884474247694016,
        0.00862825382500887,
        0.020308423787355423,
        -0.007994008250534534,
        0.005127343349158764,
        -0.0029388812836259604,
        -0.0006303209811449051,
        0.011818322353065014,
        -0.018399406224489212,
        -0.012025550939142704,
        0.008678491227328777,
        0.00016974708705674857,
        -0.013677101582288742,
        0.01983116939663887,
        -0.007918652147054672,
        -0.03245328739285469,
        0.021162457764148712,
        -0.008044245652854443,
        -0.0002005566784646362,
        0.0036076651886105537,
        -0.029187863692641258,
        -0.03569359332323074,
        -0.028961796313524246,
        0.0027096737176179886,
        0.016364796087145805,
        -0.0071022966876626015,
        -0.005174440797418356,
        -0.01400364376604557,
        -0.023787355050444603,
        -0.024566033855080605,
        0.011648771353065968,
        0.005579478573054075,
        -0.029012033715844154,
        0.010857533663511276,
        -0.0049420930445194244,
        -0.025093525648117065,
        -0.0024961652234196663,
        -0.0030016780365258455,
        0.010694262571632862,
        -0.010681703686714172,
        -0.015724271535873413,
        -0.003249724628403783,
        -0.01731930486857891,
        0.006876228842884302,
        -0.009601601399481297,
        -0.009821389801800251,
        0.012176262214779854,
        0.02582196518778801,
        -0.010185610502958298,
        -0.0005722341011278331,
        0.02747979573905468,
        -0.019793491810560226,
        -0.0016405613860115409,
        0.017005322501063347,
        0.0072027710266411304,
        -0.02220488153398037,
        0.0006103045307099819,
        -0.007912373170256615,
        -0.004559034015983343,
        -0.0010808866936713457,
        0.0038023346569389105,
        0.01679181307554245,
        0.01718115247786045,
        0.018713390454649925,
        0.004100618418306112,
        0.017859356477856636,
        -0.026022914797067642,
        -0.016013136133551598,
        -0.028811084106564522,
        -0.0023705719504505396,
        -0.030343322083353996,
        0.003858851734548807,
        0.013890610076487064,
        0.011969033628702164,
        -0.002329754177480936,
        -0.0014223431935533881,
        0.01764584705233574,
        0.016276881098747253,
        0.02692718617618084,
        0.0021821821574121714,
        -0.020923830568790436,
        -0.02855989895761013,
        -0.007510474417358637,
        -0.002681415295228362,
        0.008326830342411995,
        -0.01458137296140194,
        -0.0007727907504886389,
        0.01720627024769783,
        0.032604001462459564,
        0.004417741671204567,
        0.03554287925362587,
        0.003953046631067991,
        -0.0322021022439003,
        -0.02505584806203842,
        0.009231101721525192,
        0.012659796513617039,
        0.009563923813402653,
        -0.016842050477862358,
        -0.005513542331755161,
        0.0041414364241063595,
        -0.014405542053282261,
        0.02916274592280388,
        0.012163703329861164,
        -0.008502661250531673,
        0.010185610502958298,
        0.028710609301924706,
        -0.0177839994430542,
        0.00950112659484148,
        0.028384067118167877,
        0.014744644053280354,
        -0.009444610215723515,
        0.005055127199739218,
        0.02803240716457367,
        -0.0009089809027500451,
        0.0004933458403684199,
        -0.014405542053282261,
        -0.008433585055172443,
        0.0012111896649003029,
        -0.031498778611421585,
        0.0004556679050438106,
        -0.021388525143265724,
        0.019605102017521858,
        0.03569359332323074,
        0.013689660467207432,
        0.01161109283566475,
        -0.0064680506475269794,
        -0.005783567670732737,
        0.009205983020365238,
        -0.013413355685770512,
        -0.001551076304167509,
        -0.013488711789250374,
        0.003160239430144429,
        -0.005114783998578787,
        -0.012408610433340073,
        -0.003158669453114271,
        0.0335836261510849,
        -0.026274101808667183,
        0.009319016709923744,
        0.0005424057017080486,
        -0.0011326938401907682,
        -0.008841762319207191,
        -0.005205838941037655,
        0.0030566249042749405,
        -0.013036576099693775,
        -0.022443508729338646,
        -0.0017865635454654694,
        0.011830881237983704,
        0.007096016779541969,
        -0.01605081371963024,
        -0.024038542062044144,
        -0.005039427895098925,
        -0.0011122849537059665,
        -0.0032779830507934093,
        -0.008571737445890903,
        0.0041194576770067215,
        0.014292508363723755,
        -0.02342313528060913,
        0.015485644340515137,
        -0.002646877197548747,
        0.020722880959510803,
        0.00680087273940444,
        -0.009865347295999527,
        -0.00600649556145072,
        0.021036865189671516,
        0.020371221005916595,
        -0.0016107329865917563,
        -0.019291117787361145,
        0.013287762179970741,
        -0.004006423521786928,
        -0.00762350857257843,
        -0.015334932133555412,
        0.0008681631297804415,
        -0.008295431733131409,
        -0.0007543442770838737,
        0.011523177847266197,
        -0.0027818898670375347,
        0.0037332584615796804,
        0.01304913591593504,
        -0.005237237084656954,
        -0.00340043636970222,
        0.016691338270902634,
        0.02612338960170746,
        -0.002295216079801321,
        -0.008333110250532627,
        -0.009683237411081791,
        -0.022305356338620186,
        -0.00935041531920433,
        0.06972935795783997,
        -0.001001605880446732,
        -0.0031319810077548027,
        0.019617659971117973,
        -0.019014813005924225,
        -0.012848186306655407,
        -0.04051637277007103,
        -0.018223576247692108,
        -0.01665366068482399,
        -0.0031979172490537167,
        -0.004436580464243889,
        0.004458559211343527,
        -0.013802695088088512,
        0.01994420401751995,
        0.013551508076488972,
        0.009080389514565468,
        0.00340043636970222,
        -0.027052778750658035,
        -0.011341067962348461,
        0.0048667374067008495,
        -0.0151088647544384,
        0.010110254399478436,
        0.00046077012666501105,
        0.009990940801799297,
        0.0205470509827137,
        0.008307991549372673,
        0.00277247023768723,
        0.01720627024769783,
        -0.007460237015038729,
        -0.0038965295534580946,
        0.008483821526169777,
        -0.00610383041203022,
        0.01745745725929737,
        0.008778966031968594,
        0.013312880881130695,
        0.014078999869525433,
        0.0029043431859463453,
        0.03780356049537659,
        0.008464982733130455,
        -0.005384809337556362,
        0.011868558824062347,
        0.005287474486976862,
        -0.006179186515510082,
        -0.006009635515511036,
        0.0142673896625638,
        0.0023344638757407665,
        -0.0057710083201527596,
        0.028660371899604797,
        -0.0014239131705835462,
        -0.028685491532087326,
        0.023121710866689682,
        0.007742822170257568,
        -0.009218541905283928,
        -0.023071475327014923,
        -0.01187483873218298,
        0.018072864040732384,
        0.00027355772908777,
        -0.013928287662565708,
        -0.010844974778592587,
        -0.017796559259295464,
        -0.014744644053280354,
        -0.01513398252427578,
        -0.014694406650960445,
        -0.001071467180736363,
        -0.031172236427664757,
        -0.019190644845366478,
        -0.02369944006204605,
        0.002376851625740528,
        -0.007466516923159361,
        0.001561280689202249,
        -0.0048981355503201485,
        -0.03863247483968735,
        -0.0125404829159379,
        -0.0036516229156404734,
        0.01909017004072666,
        0.0004517431079875678,
        -0.001898027490824461,
        -0.006486889906227589,
        0.004044101573526859,
        0.013802695088088512,
        -0.006047313567250967,
        -0.029012033715844154,
        0.011271991766989231,
        -0.006292220205068588,
        0.015611236914992332,
        0.0031727987807244062,
        0.00666272034868598,
        0.009111788123846054,
        -0.01690484769642353,
        0.029589762911200523,
        0.008521500043570995,
        0.015071186237037182,
        0.026299219578504562,
        0.0003149642434436828,
        0.018650593236088753,
        0.004537055268883705,
        0.0050582666881382465,
        0.001783423707820475,
        -0.019906524568796158,
        -1.6802998288767412e-05,
        -0.008307991549372673,
        -0.01179320365190506,
        -0.0020785678643733263,
        0.004810220096260309,
        -0.0034695127978920937,
        0.01676669530570507,
        0.01690484769642353,
        0.01568659394979477,
        -0.015372609719634056,
        -0.0016327118501067162,
        0.034211594611406326,
        -0.024088779464364052,
        0.016251763328909874,
        -0.0064806099981069565,
        0.0001965337578440085,
        0.013438474386930466,
        0.021564355120062828,
        0.03504050895571709,
        -0.004694046452641487,
        -0.03443766012787819,
        -0.005557499825954437,
        -0.025281915441155434,
        0.008490101434290409,
        0.0157996267080307,
        0.009130626916885376,
        0.0012418029364198446,
        -0.0018619195325300097,
        -0.022192321717739105,
        -0.006907626986503601,
        -0.0032748430967330933,
        -0.005548080429434776,
        0.019542304798960686,
        -0.006304779555648565,
        -0.0023250444792211056,
        -0.015309813432395458,
        -0.006499449256807566,
        -0.018324051052331924,
        0.005968817975372076,
        -0.006851110141724348,
        -0.017859356477856636,
        -0.02425205148756504,
        -0.0063236188143491745,
        -0.006568525452166796,
        -0.013890610076487064,
        -0.04551498219370842,
        -0.044786542654037476,
        -0.016163846477866173,
        0.00394362723454833,
        -0.017444897443056107,
        0.016364796087145805,
        -0.003949906677007675,
        -0.000322813808452338,
        -0.019981881603598595,
        -0.010625186376273632,
        -0.006342457607388496,
        -0.01692996546626091,
        -0.0006311059114523232,
        -0.006116389762610197,
        0.027002541348338127,
        0.027931932359933853,
        0.025369830429553986,
        0.013978525064885616,
        0.0114603815600276,
        0.006474330555647612,
        -0.008559177629649639,
        -0.01662854291498661,
        0.004559034015983343,
        -0.012094627134501934,
        -0.014204593375325203,
        0.012710033915936947,
        0.020999185740947723,
        0.028459424152970314,
        -0.007026940584182739,
        0.009595322422683239,
        0.0048447586596012115,
        -0.008188677951693535,
        -0.020873593166470528,
        0.0010173050686717033,
        -0.013928287662565708,
        -0.014405542053282261,
        -0.020974067971110344,
        -0.0010471334680914879,
        0.002153923735022545,
        0.010958008468151093,
        0.011234313249588013,
        -0.015548440627753735,
        0.011812042444944382,
        0.008596856147050858,
        0.02079823799431324,
        0.003050345228984952,
        0.026299219578504562,
        -0.01969301700592041,
        0.02742955833673477,
        0.007265567779541016,
        0.020597288385033607,
        -0.013162169605493546,
        0.007711423560976982,
        -0.006794593296945095,
        0.016917407512664795,
        0.009262500330805779,
        0.021451322361826897,
        0.02770586498081684,
        -0.012270457111299038,
        -0.010593787766993046,
        0.005884042475372553,
        0.026173627004027367,
        0.0031162817031145096,
        0.007510474417358637,
        0.004194813314825296,
        -0.007862135767936707,
        0.0027096737176179886,
        -0.020195389166474342,
        -0.004998610354959965,
        -0.013501270674169064,
        0.004712885711342096,
        -0.012201380915939808,
        -0.01872594840824604,
        0.018625473603606224,
        -0.011881118640303612,
        -0.029087388888001442,
        -0.015787066891789436,
        -0.0003167304093949497,
        0.048001728951931,
        0.005959398113191128,
        0.012100907042622566,
        0.01939159259200096,
        0.0028556757606565952,
        -0.005940559320151806,
        0.02068520337343216,
        -0.018675711005926132,
        -0.005522961728274822,
        0.039813049137592316,
        0.0015455815009772778,
        -0.0063236188143491745,
        -0.02416413463652134,
        0.004684627056121826,
        0.004078639671206474,
        -0.015071186237037182,
        -0.024051101878285408,
        0.008050525560975075,
        0.028760846704244614,
        -0.002221429953351617,
        -0.014619050547480583,
        0.006113249808549881,
        -0.033131491392850876,
        0.02471674606204033,
        0.003482071915641427,
        -0.021363407373428345,
        -0.015058627352118492,
        0.012440008111298084,
        -0.023071475327014923,
        0.0088919997215271,
        -0.0305191520601511,
        0.02720349095761776,
        0.03413623571395874,
        -0.005836945027112961,
        -0.006668999791145325,
        0.003796054981648922,
        0.00045880774268880486,
        -0.0013477721950039268,
        -0.007686304859817028,
        0.02833382971584797,
        -0.007058338727802038,
        0.0036641822662204504,
        0.01891433820128441,
        -0.0021272350568324327,
        -0.02079823799431324,
        -0.005208978895097971,
        -0.012320694513618946,
        0.022581661120057106,
        -0.017331864684820175,
        0.005482144188135862,
        -0.011899957433342934,
        0.011328508146107197,
        -0.003271703375503421,
        0.012075788341462612,
        -0.013790135271847248,
        -0.00652456795796752,
        -0.006210584659129381,
        -0.0016672499477863312,
        0.02665088139474392,
        2.0819775272684637e-06,
        -0.001816391944885254,
        -0.01485767774283886,
        -0.017984949052333832,
        -0.024503236636519432,
        -0.04712257534265518,
        -0.004483677912503481,
        0.0015801197150722146,
        -0.029589762911200523,
        -0.004659508354961872,
        -0.005466444883495569,
        0.009551363997161388,
        0.02565869502723217,
        0.016779253259301186,
        -0.012358373031020164,
        0.012553042732179165,
        0.023247305303812027,
        -0.0181733388453722,
        -0.0057301907800138,
        0.018688270822167397,
        0.015975456684827805,
        -0.02584708482027054,
        0.016251763328909874,
        -0.0035699873697012663,
        -0.019178085029125214,
        0.004577872809022665,
        -0.014104118570685387,
        -0.004160275217145681,
        0.011585974134504795,
        0.013388236984610558,
        0.00026119465474039316,
        -0.016113610938191414,
        0.006348737049847841,
        0.02936369553208351,
        0.016867170110344887,
        0.009036432020366192,
        -0.007322084624320269,
        -0.003296822076663375,
        0.037050001323223114,
        -0.014217152260243893,
        0.0041539957746863365,
        -0.007334643974900246,
        -0.012697474099695683,
        0.019655339419841766,
        -0.007052059285342693,
        -0.007667466066777706,
        -0.0007700434071011841,
        -0.03576894849538803,
        -0.003962466027587652,
        -0.0019985020626336336,
        0.008138440549373627,
        0.014342745766043663,
        -0.015837304294109344,
        -0.03210162743926048,
        -0.0056108771823346615,
        -0.016703898087143898,
        0.00034538135514594615,
        0.009984660893678665,
        -0.01095172856003046,
        0.001340707647614181,
        0.013840372674167156,
        0.016716457903385162,
        0.00038973146001808345,
        0.005940559320151806,
        -0.011535737663507462,
        -0.01093916967511177,
        -0.016314558684825897,
        -0.02427716925740242,
        -0.012634677812457085,
        -0.0012151143746450543,
        0.041244812309741974,
        0.006957864388823509,
        -0.018110541626811028,
        -0.0088919997215271,
        -0.005127343349158764,
        -0.02747979573905468,
        -0.03227745741605759,
        -0.002741072094067931,
        -0.0029655699618160725,
        -0.0056234365329146385,
        0.016741575673222542,
        -0.008527779951691628,
        0.022016491740942,
        0.006263962015509605,
        -0.0022748070769011974,
        -0.00584008451551199,
        -0.019630219787359238,
        0.011824601329863071,
        0.004703465849161148,
        0.01513398252427578,
        -0.000919970334507525,
        -0.02328498288989067,
        0.002163343131542206,
        0.0026296081487089396,
        0.011912517249584198,
        -0.0042073726654052734,
        0.00936297420412302,
        -0.009331576526165009,
        -0.010487033985555172,
        -0.014355304650962353,
        -0.0040284027345478535,
        -0.002763050841167569,
        0.007253008428961039,
        0.016364796087145805,
        -0.0157996267080307,
        0.001935705542564392,
        0.0050582666881382465,
        -0.003422415116801858,
        0.003422415116801858,
        -0.00468148710206151,
        0.008433585055172443,
        -0.015360050834715366,
        0.015837304294109344,
        -0.019981881603598595,
        -0.006320478860288858,
        0.003088023280724883,
        -0.02336033806204796,
        0.015159101225435734,
        -0.025420067831873894,
        0.00652456795796752,
        0.0294390507042408,
        0.01068798266351223,
        0.00756699126213789,
        -0.0008603135356679559,
        -0.0018933177925646305,
        0.0024160996545106173,
        0.005488423630595207,
        -0.00935041531920433,
        -0.011315949261188507,
        -2.2285437808022834e-05,
        -0.007372322026640177,
        -0.021149897947907448,
        -0.02966511808335781,
        0.0021617733873426914,
        -0.025721492245793343,
        -0.014330185949802399,
        0.001420773332938552,
        0.03476420417428017,
        0.015787066891789436,
        -0.026550406590104103,
        -0.01319984719157219,
        -0.023159390315413475,
        0.013011457398533821,
        0.009406931698322296,
        -0.013388236984610558,
        0.001562850666232407,
        -0.00962672010064125,
        -0.0042324913665652275,
        0.021916016936302185,
        -0.01762072928249836,
        0.005290614441037178,
        0.018537558615207672,
        0.016176406294107437,
        -0.004251330625265837,
        0.039260439574718475,
        0.2337038666009903,
        -0.012791668996214867,
        0.0035228899214416742,
        0.027354203164577484,
        0.02217976190149784,
        0.013551508076488972,
        0.010210729204118252,
        0.0017080678371712565,
        0.006982983089983463,
        0.0028289873152971268,
        -0.01159853395074606,
        0.0012826207093894482,
        -0.00470974575728178,
        0.006756915245205164,
        0.005450745578855276,
        0.00941949151456356,
        -0.02107454277575016,
        -0.019793491810560226,
        -0.030921051278710365,
        -0.024503236636519432,
        0.006279660854488611,
        -0.03295566141605377,
        -0.025256795808672905,
        -0.014305067248642445,
        0.017495134845376015,
        -0.0072969659231603146,
        -0.008446143940091133,
        0.0011868559522554278,
        0.03363386541604996,
        0.009746033698320389,
        0.004763122648000717,
        -0.011573415249586105,
        0.011228034272789955,
        0.011246873065829277,
        -0.014367864467203617,
        -0.019567424431443214,
        0.03604525327682495,
        0.013363118283450603,
        0.03084569424390793,
        0.003871411085128784,
        -0.016553185880184174,
        -0.025068406015634537,
        -0.004518216010183096,
        -0.017633287236094475,
        0.0027238030452281237,
        0.011096160858869553,
        0.007246728986501694,
        -0.026173627004027367,
        0.018123101443052292,
        0.033985525369644165,
        0.004656368400901556,
        0.007014381233602762,
        0.02300867810845375,
        0.044032983481884,
        0.00041406514355912805,
        -0.015950338914990425,
        0.008653372526168823,
        0.016666220501065254,
        -0.005259216297417879,
        -0.000553002639207989,
        0.0005902881384827197,
        0.03185044229030609,
        -2.5854542400338687e-05,
        0.03943626955151558,
        -0.023598965257406235,
        0.007472796365618706,
        -0.02519400045275688,
        -0.002959290286526084,
        0.003968745935708284,
        -0.0035166102461516857,
        -0.01667878031730652,
        -0.0014144936576485634,
        0.00603475421667099,
        -0.00840846635401249,
        -0.029514405876398087,
        -0.009519966319203377,
        0.010499592870473862,
        0.0036798813380301,
        0.03335756063461304,
        0.0274546779692173,
        -0.009388092905282974,
        -0.0015463664894923568,
        -0.009859067387878895,
        -0.02010747417807579,
        -0.038657594472169876,
        -0.029037151485681534,
        0.0022512583527714014,
        -0.00729068648070097,
        -0.012107186019420624,
        -0.023372897878289223,
        -0.014531135559082031,
        -0.010254686698317528,
        0.001805402571335435,
        -0.006069292314350605,
        0.023686882108449936,
        0.026198744773864746,
        -0.0034569534473121166,
        0.010612627491354942,
        -0.02091127075254917,
        0.012364652007818222,
        -0.005510402377694845,
        -0.00246790680103004,
        7.820140308467671e-05,
        0.0090866694226861,
        0.0022292796056717634,
        0.009903025813400745,
        -0.0071148560382425785,
        0.0007563066901639104,
        -0.00300324778072536,
        -0.01563635654747486,
        -0.022280236706137657,
        -0.0016735296230763197,
        -0.006807152647525072,
        -0.01108988095074892,
        -0.00680087273940444,
        0.00976487249135971,
        -0.005080245900899172,
        -0.00265472661703825,
        -0.0003416528052184731,
        -0.008885719813406467,
        -0.003959326073527336,
        -0.013915728777647018,
        -0.014794881455600262,
        0.002414529677480459,
        -0.006169767118990421,
        -0.014807440340518951,
        -0.006888788193464279,
        0.00915574561804533,
        0.014531135559082031,
        -0.030016779899597168,
        0.00854661874473095,
        -0.0002739502233453095,
        0.021237812936306,
        0.008113321848213673,
        -0.006989262532442808,
        0.007334643974900246,
        0.016992762684822083,
        -0.0028666651342064142,
        -0.016879728063941002,
        0.0035448686685413122,
        -0.0021131059620529413,
        -0.010003499686717987,
        0.0088919997215271,
        -0.0073534827679395676,
        -0.014920474030077457,
        -0.01649039052426815,
        0.03112199902534485,
        -0.027555152773857117,
        -0.010970567353069782,
        0.002863525412976742,
        -0.04272681474685669,
        -0.018185898661613464,
        -0.013903168961405754,
        -0.0035385889932513237,
        0.027931932359933853,
        -0.020735440775752068,
        -0.019680457189679146,
        -0.02485489845275879,
        -0.006210584659129381,
        0.009896745905280113,
        -0.012502805329859257,
        0.009645558893680573,
        0.02996654249727726,
        -0.007862135767936707,
        -0.038657594472169876,
        -0.013149609789252281,
        -0.16045789420604706,
        0.014430660754442215,
        0.011523177847266197,
        -0.012760271318256855,
        0.02066008374094963,
        0.015171661041676998,
        0.024641389027237892,
        0.004643809515982866,
        -0.009067830629646778,
        0.018575238063931465,
        0.009859067387878895,
        -0.01288586389273405,
        -0.014945592731237411,
        -0.014305067248642445,
        0.005290614441037178,
        -0.01612616889178753,
        -0.008182398043572903,
        0.012352093122899532,
        0.029740475118160248,
        0.028685491532087326,
        0.035115864127874374,
        -0.007334643974900246,
        -0.0009168304968625307,
        -0.00477882195264101,
        0.013714779168367386,
        -0.007811898365616798,
        0.0007111715967766941,
        0.03393528610467911,
        -0.010053737089037895,
        -0.007020661141723394,
        -0.016000576317310333,
        -0.02888644114136696,
        0.02582196518778801,
        0.004559034015983343,
        0.021966254338622093,
        -0.006430373061448336,
        0.012446288019418716,
        -0.042601220309734344,
        -0.01635223627090454,
        0.024930253624916077,
        0.032855186611413956,
        0.018625473603606224,
        0.019228322431445122,
        -0.013526389375329018,
        -0.015410288237035275,
        -0.005224677734076977,
        0.020609848201274872,
        0.015083746053278446,
        0.01621408388018608,
        -0.0070897373370826244,
        0.008119601756334305,
        -0.003028366481885314,
        -0.0005110073834657669,
        -0.00622942391782999,
        0.019981881603598595,
        0.029489288106560707,
        0.006888788193464279,
        0.02452835626900196,
        0.0024773261975497007,
        -0.008458703756332397,
        -0.0002564848982729018,
        -0.012370931915938854,
        -0.006254542153328657,
        -0.004706605803221464,
        0.006050453521311283,
        -0.005224677734076977,
        0.0014686556532979012,
        0.009689517319202423,
        -0.00024942029267549515,
        0.018349168822169304,
        -0.008910838514566422,
        -0.023084033280611038,
        -0.01039911899715662,
        -0.02554566040635109,
        0.006825991440564394,
        0.008389626629650593,
        -0.03378457576036453,
        0.03109688125550747,
        -0.0114603815600276,
        -0.014832559041678905,
        -0.008295431733131409,
        0.021313169971108437,
        -0.0024836058728396893,
        0.007070898078382015,
        -0.023121710866689682,
        0.00038973146001808345,
        0.0032465846743434668,
        0.012653516605496407,
        -0.03134806826710701,
        -0.027002541348338127,
        0.007453957572579384,
        -0.017984949052333832,
        -0.014166914857923985,
        -0.018022626638412476,
        0.00543504673987627,
        0.015598678030073643,
        0.012534203007817268,
        0.01623920351266861,
        -0.01571171171963215,
        -0.019893966615200043,
        -0.002260677982121706,
        0.017143474891781807,
        -0.016013136133551598,
        0.011190355755388737,
        0.03049403429031372,
        -0.005469584837555885,
        0.017532814294099808,
        0.007981449365615845,
        0.022405831143260002,
        -0.03169972822070122,
        -0.03217698261141777,
        0.006631322205066681,
        0.014694406650960445,
        0.026148507371544838,
        0.01623920351266861,
        0.026449931785464287,
        -0.013262644410133362,
        -0.01734442450106144,
        0.0048321993090212345,
        -0.013099372386932373,
        0.052045829594135284,
        0.006223144009709358,
        -0.04420881345868111,
        -0.0010887362295761704,
        0.0028792244847863913,
        0.0002376459160586819,
        -0.08409722149372101,
        -0.03438742458820343,
        0.004712885711342096,
        0.014405542053282261,
        -0.001269276486709714,
        0.03835617005825043,
        0.004216792527586222,
        -0.0017881334060803056,
        0.0019875126890838146,
        0.005535521078854799,
        0.008923398330807686,
        -0.03049403429031372,
        -0.01787191443145275,
        -0.010568669065833092,
        0.04071732237935066,
        -0.02720349095761776,
        0.012779110111296177,
        0.0010651875054463744,
        -0.033960405737161636,
        0.011604813858866692,
        -0.008678491227328777,
        0.007868414744734764,
        0.0028478263411670923,
        -0.001828951295465231,
        -0.02222999930381775,
        0.012201380915939808,
        -0.022971000522375107,
        0.03127271309494972,
        0.0016358516877517104,
        -0.004926394205540419,
        -0.016754135489463806,
        -0.024063661694526672,
        -0.014166914857923985,
        -0.008307991549372673,
        -0.004449139814823866,
        0.013626864179968834,
        -0.04099362716078758,
        0.03102152608335018,
        0.013413355685770512,
        -0.04106898233294487,
        0.027253728359937668,
        0.010593787766993046,
        -0.006461771205067635,
        -0.05581362545490265,
        -0.012659796513617039,
        0.017168592661619186,
        -0.0019137266790494323,
        0.03777844086289406,
        0.01870083063840866,
        -0.009048991836607456,
        -0.03222722187638283,
        -0.03139830380678177,
        -0.021916016936302185,
        0.010832414962351322,
        0.01048075407743454,
        -0.0017535953084006906,
        0.008056805469095707,
        0.028911558911204338,
        -0.009921864606440067,
        0.011962753720581532,
        0.013375678099691868,
        0.006147787906229496,
        -0.02038377895951271,
        0.01704300008714199,
        -0.012559321708977222,
        0.018575238063931465,
        -0.012697474099695683,
        0.0232221856713295,
        0.0201577115803957,
        0.00969579629600048,
        -0.01563635654747486,
        0.028409186750650406,
        0.006245122756808996,
        0.010053737089037895,
        -0.03792915120720863,
        -0.005469584837555885,
        -0.015837304294109344,
        -0.020258186385035515,
        0.006606203503906727,
        -0.005811826325953007,
        -0.03310637176036835,
        -0.020735440775752068,
        0.009218541905283928,
        -0.008182398043572903,
        0.026022914797067642,
        0.01360174547880888,
        -0.01054983027279377,
        0.0057710083201527596,
        -0.015347491018474102,
        -0.03544240817427635,
        -0.03054427169263363,
        0.01035516057163477,
        0.006989262532442808,
        -0.011510618962347507,
        0.007529313676059246,
        0.014757202938199043,
        -0.01180576253682375,
        -0.0010895212180912495,
        0.01607593148946762,
        0.010807296261191368,
        -0.02485489845275879,
        -0.011824601329863071,
        -0.0650070533156395,
        0.032352812588214874,
        -0.016113610938191414,
        -0.019353915005922318,
        0.0009600031771697104,
        -0.022016491740942,
        0.006687839049845934,
        -0.01512142363935709,
        0.002095836913213134,
        -0.00881036464124918,
        -0.018675711005926132,
        -0.0048635974526405334,
        -0.002117815660312772,
        0.012615839019417763,
        -0.02798216976225376,
        0.0032685634214431047,
        0.020182831212878227,
        0.014192033559083939,
        0.005880902521312237,
        -0.002144504338502884,
        0.00037756460369564593,
        -0.03245328739285469,
        -0.016502948477864265,
        0.001036928966641426,
        -0.023774797096848488,
        0.011177796870470047,
        -0.0314234234392643,
        0.0005577123956754804,
        -0.002411389723420143,
        -0.00874128844588995,
        0.0064366525039076805,
        -0.042576100677251816,
        0.0064021144062280655,
        0.022682135924696922,
        0.0008108612382784486,
        -0.010122813284397125,
        0.006794593296945095,
        0.015083746053278446,
        0.007366042118519545,
        0.020333541557192802,
        -0.030895931646227837,
        -0.033181726932525635,
        0.015611236914992332,
        -0.010832414962351322,
        0.0036013855133205652,
        0.004634389653801918,
        -0.005340851843357086,
        -0.0033941566944122314,
        0.030418677255511284,
        0.009808829985558987,
        -0.003227745648473501,
        0.029263220727443695,
        -0.028409186750650406,
        -0.013036576099693775,
        -0.012841906398534775,
        -0.008351949043571949,
        -0.017143474891781807,
        -0.0003757984668482095,
        -0.008333110250532627,
        -0.04493725299835205,
        0.02248118631541729,
        0.00018220828496851027,
        0.02966511808335781,
        -0.007868414744734764,
        0.005246656946837902,
        0.010813576169312,
        -0.0205470509827137,
        -0.002692404668778181,
        -0.014619050547480583,
        -0.02234303392469883,
        -0.006920186337083578,
        -0.00593741936609149,
        0.00517130084335804,
        0.005312593188136816,
        0.019630219787359238,
        0.020647525787353516,
        -0.003180648200213909,
        0.0170932374894619,
        -0.040616847574710846,
        0.037728201597929,
        0.020258186385035515,
        0.021112220361828804,
        -0.030318202450871468,
        -0.0004332966054789722,
        0.034563254565000534,
        0.006088131107389927,
        -0.006254542153328657,
        -0.01731930486857891,
        -0.009205983020365238,
        0.00310058263130486,
        -0.03607037290930748,
        -0.0266759991645813,
        0.006939025595784187,
        0.024289729073643684,
        -0.0038368727546185255,
        0.00473800441250205,
        0.017520254477858543,
        0.00783073715865612,
        0.017658406868577003,
        0.023372897878289223,
        0.0023721419274806976,
        0.009438330307602882,
        -0.016000576317310333,
        -0.004552754107862711,
        -0.008785245940089226,
        0.005924860015511513,
        -0.006289080251008272,
        -0.037878915667533875,
        -0.007328364532440901,
        0.012898423708975315,
        0.011812042444944382,
        0.029790712520480156,
        -0.006361296400427818,
        0.015046067535877228,
        -0.02499305084347725,
        -0.00584008451551199,
        0.02609827183187008,
        -0.0014945593429729342,
        -0.028434304520487785,
        0.04242539033293724,
        -0.008973635733127594,
        0.0035794067662209272,
        0.02770586498081684,
        -0.015071186237037182,
        0.008433585055172443,
        0.002970279660075903,
        -0.007793059106916189,
        -0.0035228899214416742,
        0.01905249059200287,
        -0.0050017498433589935,
        0.011717847548425198,
        -0.010041178204119205,
        -0.016452711075544357,
        -0.015410288237035275,
        0.0009136906592175364,
        0.01372733898460865,
        -0.008351949043571949,
        0.024339966475963593,
        -0.0161889661103487,
        0.05631599947810173,
        -0.012722592800855637,
        -0.0041634151712059975,
        0.02287052571773529,
        0.014292508363723755,
        0.020735440775752068,
        0.024427881464362144,
        0.018072864040732384,
        -0.008559177629649639,
        -0.02024562656879425,
        -0.0038494321051985025,
        0.006756915245205164,
        -0.0008289152756333351,
        -0.02259422093629837,
        -0.011127559468150139,
        0.004289008677005768,
        -0.01591266132891178,
        0.004398902412503958,
        -0.019630219787359238,
        0.01292354241013527,
        0.018324051052331924,
        0.008942237123847008,
        0.0243902038782835,
        0.013212407007813454,
        -0.018072864040732384,
        -0.019931644201278687,
        0.00423877127468586,
        0.0012331684119999409,
        -0.018349168822169304,
        -0.016151288524270058,
        0.017243949696421623,
        0.0007649412145838141,
        -0.0012857605470344424,
        -0.01260955911129713,
        0.006656440440565348,
        0.002959290286526084,
        -0.007108576130121946,
        -0.009513686411082745,
        -0.0036422032862901688,
        0.010562390089035034,
        0.02289564348757267,
        0.0006240413058549166,
        -0.018097983673214912,
        -0.03295566141605377,
        0.006813432089984417,
        0.002750491490587592,
        -0.02936369553208351,
        -0.005384809337556362,
        -0.02317194826900959
      ]
    }
  ],
  "model": "ada",
  "usage": {
    "prompt_tokens": 3,
    "total_tokens": 3
  }
}


================================================
FILE: libs/kotaemon/tests/resources/embedding_openai_batch.json
================================================
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "index": 0,
      "embedding": [
        0.006555966101586819,
        0.003670461941510439,
        -0.011642491444945335,
        -0.026776473969221115,
        -0.012383491732180119,
        -0.0014341175556182861,
        -0.013375678099691868,
        0.009356695227324963,
        -0.006364436354488134,
        -0.0294390507042408,
        0.023950627073645592,
        0.0029859787318855524,
        -0.023234745487570763,
        -0.009205983020365238,
        0.006744355894625187,
        0.0011790062999352813,
        0.02607315219938755,
        -0.018437083810567856,
        0.008904559537768364,
        0.009620440192520618,
        -0.01306169480085373,
        -0.0011358336778357625,
        0.007253008428961039,
        0.00875384733080864,
        -0.012710033915936947,
        0.0037206991109997034,
        0.005419347435235977,
        -0.017243949696421623,
        0.036246202886104584,
        -0.0266759991645813,
        0.012647237628698349,
        -0.008552898652851582,
        -0.00762350857257843,
        -0.012546762824058533,
        0.007083457428961992,
        -0.014078999869525433,
        0.0048761568032205105,
        -0.013689660467207432,
        0.018211016431450844,
        -0.014367864467203617,
        0.008307991549372673,
        0.006022194866091013,
        0.005457025486975908,
        -0.006320478860288858,
        -0.03609549254179001,
        0.012986338697373867,
        0.00017592862423043698,
        -0.016427593305706978,
        -0.004734864458441734,
        0.0257717277854681,
        0.01944182999432087,
        -0.00211938563734293,
        -0.0147195253521204,
        0.01332544069737196,
        -0.017670966684818268,
        0.0013956546317785978,
        -0.04024006798863411,
        0.024754423648118973,
        0.032051388174295425,
        -0.018713390454649925,
        0.011265711858868599,
        0.012370931915938854,
        -0.013576626777648926,
        0.0040974789299070835,
        -0.002153923735022545,
        0.009915584698319435,
        0.006637601647526026,
        0.011353626847267151,
        -0.019919084385037422,
        0.030142372474074364,
        0.01966789737343788,
        -0.008213796652853489,
        -0.004213652573525906,
        -0.009162025526165962,
        0.012446288019418716,
        0.008979915641248226,
        -0.02098662778735161,
        0.003113141981884837,
        0.0018838982796296477,
        -0.0008289152756333351,
        0.02722861059010029,
        -0.03355850651860237,
        -0.004408321809023619,
        0.017859356477856636,
        0.019893966615200043,
        -0.0014137086691334844,
        -0.0021602034103125334,
        0.017959831282496452,
        -0.0114603815600276,
        -0.020069796591997147,
        0.007604669313877821,
        0.001281835837289691,
        0.01132222916930914,
        0.006668999791145325,
        -0.01743233948945999,
        0.0048761568032205105,
        0.0016923686489462852,
        0.02356128767132759,
        -0.005796127021312714,
        -0.041470881551504135,
        -0.01292354241013527,
        -0.005824385676532984,
        -0.010700542479753494,
        -0.017005322501063347,
        -0.007950050756335258,
        0.011560855433344841,
        -0.005485283676534891,
        -0.0002590360236354172,
        0.028434304520487785,
        0.007447678130120039,
        -0.026198744773864746,
        0.009306457825005054,
        -0.00500488979741931,
        -0.03107176162302494,
        -0.0007555217016488314,
        -0.008056805469095707,
        0.007479076273739338,
        0.003978165332227945,
        -0.005984516814351082,
        -0.02778122015297413,
        0.015272135846316814,
        0.029338575899600983,
        0.010964288376271725,
        -0.023548727855086327,
        0.007014381233602762,
        0.0014137086691334844,
        -0.03380969539284706,
        -0.016741575673222542,
        -0.005230957642197609,
        -0.004794521257281303,
        0.0315992534160614,
        0.0041100382804870605,
        0.010644025169312954,
        -0.0014294078573584557,
        -0.035367049276828766,
        0.0246916264295578,
        -0.007096016779541969,
        0.02096150815486908,
        -0.017030440270900726,
        -0.011906237341463566,
        0.006273381412029266,
        0.021237812936306,
        0.01757049188017845,
        -0.013224965892732143,
        -0.005092805251479149,
        0.01288586389273405,
        0.004486817866563797,
        -0.016339678317308426,
        0.008917118422687054,
        -0.015083746053278446,
        0.008377067744731903,
        0.0185626782476902,
        0.012961219996213913,
        -0.002535413019359112,
        0.0050143091939389706,
        0.016025694087147713,
        0.004618690814822912,
        0.0205470509827137,
        0.008000288158655167,
        -0.013074253685772419,
        -0.0007264782907441258,
        -0.0004474258457776159,
        0.0033125211484730244,
        -0.03084569424390793,
        0.009400652721524239,
        0.015787066891789436,
        0.02607315219938755,
        0.003689300734549761,
        -0.013940847478806973,
        -0.02604803442955017,
        -0.011454101651906967,
        0.007340923883020878,
        -0.043178949505090714,
        0.02660064399242401,
        -0.01593777909874916,
        0.00514304218813777,
        -0.001305384561419487,
        0.022820288315415382,
        -0.012697474099695683,
        -0.017269067466259003,
        -0.03024284727871418,
        0.011617372743785381,
        0.008992474526166916,
        0.025897322222590446,
        0.00629850011318922,
        -0.00014511904737446457,
        0.009890465997159481,
        -0.006364436354488134,
        0.00384629238396883,
        -0.017030440270900726,
        0.016666220501065254,
        0.03333244100213051,
        0.021011745557188988,
        0.01844964362680912,
        -0.6892555952072144,
        -0.007196491584181786,
        0.025319593027234077,
        0.02024562656879425,
        0.006245122756808996,
        0.009909304790198803,
        0.012176262214779854,
        0.027806337922811508,
        0.006518288049846888,
        0.0274546779692173,
        -0.02107454277575016,
        0.013011457398533821,
        -0.015083746053278446,
        -0.010719381272792816,
        -0.0002598209830466658,
        -0.00823263544589281,
        0.0055763390846550465,
        -0.012339534237980843,
        -0.011881118640303612,
        0.015196779742836952,
        -0.010141652077436447,
        0.03129782900214195,
        -0.022104406729340553,
        -0.014568813145160675,
        0.009928143583238125,
        0.011730406433343887,
        0.0025102945510298014,
        -0.009739753790199757,
        -0.006964143831282854,
        0.030192609876394272,
        -0.0021272350568324327,
        0.0114603815600276,
        9.394961671205238e-05,
        0.014154355973005295,
        0.06611227244138718,
        0.007610949221998453,
        -0.005463304929435253,
        0.008088203147053719,
        0.009827669709920883,
        0.028986915946006775,
        -0.019077610224485397,
        -0.022267676889896393,
        0.012559321708977222,
        -0.01662854291498661,
        -0.002763050841167569,
        0.009205983020365238,
        0.012659796513617039,
        -0.0024051100481301546,
        0.0022543983068317175,
        0.0017849936848506331,
        0.014568813145160675,
        -0.004851038102060556,
        0.010857533663511276,
        0.019504627212882042,
        -0.005343991331756115,
        0.0071588135324418545,
        0.02151411771774292,
        0.011962753720581532,
        -0.002935741562396288,
        0.004521355964243412,
        0.0005718416068702936,
        0.017168592661619186,
        -0.0016280021518468857,
        0.0038494321051985025,
        -0.022443508729338646,
        0.02381247468292713,
        -0.021313169971108437,
        0.01400364376604557,
        -0.008929678238928318,
        -0.012050669640302658,
        -0.0018368008313700557,
        -0.007994008250534534,
        -0.018047746270895004,
        -0.014744644053280354,
        0.013965966179966927,
        0.028107762336730957,
        0.01681693270802498,
        -0.0033219405449926853,
        -0.006160347256809473,
        0.01966789737343788,
        0.010926609858870506,
        0.013237525708973408,
        -0.014066440984606743,
        -0.01764584705233574,
        0.02349849045276642,
        -0.019780931994318962,
        -0.030067017301917076,
        -0.016276881098747253,
        0.010430516675114632,
        0.011479220353066921,
        0.030192609876394272,
        0.0033250804990530014,
        -0.012370931915938854,
        -0.01980605162680149,
        0.02883620373904705,
        0.0005384809337556362,
        -0.011548296548426151,
        0.003821173682808876,
        0.027303965762257576,
        0.0004238771216478199,
        0.014933033846318722,
        -0.0028038686141371727,
        0.0003752097545657307,
        0.00652456795796752,
        0.010279805399477482,
        0.006656440440565348,
        -0.00809448305517435,
        0.033156611025333405,
        0.02444044128060341,
        -0.030142372474074364,
        0.006317338906228542,
        0.004094338975846767,
        -0.04071732237935066,
        0.02121269516646862,
        0.008471262641251087,
        -0.032001152634620667,
        0.0013022447237744927,
        0.016013136133551598,
        0.026374576613307,
        -0.014694406650960445,
        0.023159390315413475,
        0.0020424597896635532,
        0.013664542697370052,
        0.0057458896189928055,
        0.0041539957746863365,
        0.005303173791617155,
        0.0003883577883243561,
        0.004342385567724705,
        -0.01690484769642353,
        0.001676669460721314,
        0.020697763189673424,
        0.005133622791618109,
        0.00046273251064121723,
        -0.005651694722473621,
        0.009808829985558987,
        0.00202676048502326,
        0.009896745905280113,
        -0.017884474247694016,
        0.00862825382500887,
        0.020308423787355423,
        -0.007994008250534534,
        0.005127343349158764,
        -0.0029388812836259604,
        -0.0006303209811449051,
        0.011818322353065014,
        -0.018399406224489212,
        -0.012025550939142704,
        0.008678491227328777,
        0.00016974708705674857,
        -0.013677101582288742,
        0.01983116939663887,
        -0.007918652147054672,
        -0.03245328739285469,
        0.021162457764148712,
        -0.008044245652854443,
        -0.0002005566784646362,
        0.0036076651886105537,
        -0.029187863692641258,
        -0.03569359332323074,
        -0.028961796313524246,
        0.0027096737176179886,
        0.016364796087145805,
        -0.0071022966876626015,
        -0.005174440797418356,
        -0.01400364376604557,
        -0.023787355050444603,
        -0.024566033855080605,
        0.011648771353065968,
        0.005579478573054075,
        -0.029012033715844154,
        0.010857533663511276,
        -0.0049420930445194244,
        -0.025093525648117065,
        -0.0024961652234196663,
        -0.0030016780365258455,
        0.010694262571632862,
        -0.010681703686714172,
        -0.015724271535873413,
        -0.003249724628403783,
        -0.01731930486857891,
        0.006876228842884302,
        -0.009601601399481297,
        -0.009821389801800251,
        0.012176262214779854,
        0.02582196518778801,
        -0.010185610502958298,
        -0.0005722341011278331,
        0.02747979573905468,
        -0.019793491810560226,
        -0.0016405613860115409,
        0.017005322501063347,
        0.0072027710266411304,
        -0.02220488153398037,
        0.0006103045307099819,
        -0.007912373170256615,
        -0.004559034015983343,
        -0.0010808866936713457,
        0.0038023346569389105,
        0.01679181307554245,
        0.01718115247786045,
        0.018713390454649925,
        0.004100618418306112,
        0.017859356477856636,
        -0.026022914797067642,
        -0.016013136133551598,
        -0.028811084106564522,
        -0.0023705719504505396,
        -0.030343322083353996,
        0.003858851734548807,
        0.013890610076487064,
        0.011969033628702164,
        -0.002329754177480936,
        -0.0014223431935533881,
        0.01764584705233574,
        0.016276881098747253,
        0.02692718617618084,
        0.0021821821574121714,
        -0.020923830568790436,
        -0.02855989895761013,
        -0.007510474417358637,
        -0.002681415295228362,
        0.008326830342411995,
        -0.01458137296140194,
        -0.0007727907504886389,
        0.01720627024769783,
        0.032604001462459564,
        0.004417741671204567,
        0.03554287925362587,
        0.003953046631067991,
        -0.0322021022439003,
        -0.02505584806203842,
        0.009231101721525192,
        0.012659796513617039,
        0.009563923813402653,
        -0.016842050477862358,
        -0.005513542331755161,
        0.0041414364241063595,
        -0.014405542053282261,
        0.02916274592280388,
        0.012163703329861164,
        -0.008502661250531673,
        0.010185610502958298,
        0.028710609301924706,
        -0.0177839994430542,
        0.00950112659484148,
        0.028384067118167877,
        0.014744644053280354,
        -0.009444610215723515,
        0.005055127199739218,
        0.02803240716457367,
        -0.0009089809027500451,
        0.0004933458403684199,
        -0.014405542053282261,
        -0.008433585055172443,
        0.0012111896649003029,
        -0.031498778611421585,
        0.0004556679050438106,
        -0.021388525143265724,
        0.019605102017521858,
        0.03569359332323074,
        0.013689660467207432,
        0.01161109283566475,
        -0.0064680506475269794,
        -0.005783567670732737,
        0.009205983020365238,
        -0.013413355685770512,
        -0.001551076304167509,
        -0.013488711789250374,
        0.003160239430144429,
        -0.005114783998578787,
        -0.012408610433340073,
        -0.003158669453114271,
        0.0335836261510849,
        -0.026274101808667183,
        0.009319016709923744,
        0.0005424057017080486,
        -0.0011326938401907682,
        -0.008841762319207191,
        -0.005205838941037655,
        0.0030566249042749405,
        -0.013036576099693775,
        -0.022443508729338646,
        -0.0017865635454654694,
        0.011830881237983704,
        0.007096016779541969,
        -0.01605081371963024,
        -0.024038542062044144,
        -0.005039427895098925,
        -0.0011122849537059665,
        -0.0032779830507934093,
        -0.008571737445890903,
        0.0041194576770067215,
        0.014292508363723755,
        -0.02342313528060913,
        0.015485644340515137,
        -0.002646877197548747,
        0.020722880959510803,
        0.00680087273940444,
        -0.009865347295999527,
        -0.00600649556145072,
        0.021036865189671516,
        0.020371221005916595,
        -0.0016107329865917563,
        -0.019291117787361145,
        0.013287762179970741,
        -0.004006423521786928,
        -0.00762350857257843,
        -0.015334932133555412,
        0.0008681631297804415,
        -0.008295431733131409,
        -0.0007543442770838737,
        0.011523177847266197,
        -0.0027818898670375347,
        0.0037332584615796804,
        0.01304913591593504,
        -0.005237237084656954,
        -0.00340043636970222,
        0.016691338270902634,
        0.02612338960170746,
        -0.002295216079801321,
        -0.008333110250532627,
        -0.009683237411081791,
        -0.022305356338620186,
        -0.00935041531920433,
        0.06972935795783997,
        -0.001001605880446732,
        -0.0031319810077548027,
        0.019617659971117973,
        -0.019014813005924225,
        -0.012848186306655407,
        -0.04051637277007103,
        -0.018223576247692108,
        -0.01665366068482399,
        -0.0031979172490537167,
        -0.004436580464243889,
        0.004458559211343527,
        -0.013802695088088512,
        0.01994420401751995,
        0.013551508076488972,
        0.009080389514565468,
        0.00340043636970222,
        -0.027052778750658035,
        -0.011341067962348461,
        0.0048667374067008495,
        -0.0151088647544384,
        0.010110254399478436,
        0.00046077012666501105,
        0.009990940801799297,
        0.0205470509827137,
        0.008307991549372673,
        0.00277247023768723,
        0.01720627024769783,
        -0.007460237015038729,
        -0.0038965295534580946,
        0.008483821526169777,
        -0.00610383041203022,
        0.01745745725929737,
        0.008778966031968594,
        0.013312880881130695,
        0.014078999869525433,
        0.0029043431859463453,
        0.03780356049537659,
        0.008464982733130455,
        -0.005384809337556362,
        0.011868558824062347,
        0.005287474486976862,
        -0.006179186515510082,
        -0.006009635515511036,
        0.0142673896625638,
        0.0023344638757407665,
        -0.0057710083201527596,
        0.028660371899604797,
        -0.0014239131705835462,
        -0.028685491532087326,
        0.023121710866689682,
        0.007742822170257568,
        -0.009218541905283928,
        -0.023071475327014923,
        -0.01187483873218298,
        0.018072864040732384,
        0.00027355772908777,
        -0.013928287662565708,
        -0.010844974778592587,
        -0.017796559259295464,
        -0.014744644053280354,
        -0.01513398252427578,
        -0.014694406650960445,
        -0.001071467180736363,
        -0.031172236427664757,
        -0.019190644845366478,
        -0.02369944006204605,
        0.002376851625740528,
        -0.007466516923159361,
        0.001561280689202249,
        -0.0048981355503201485,
        -0.03863247483968735,
        -0.0125404829159379,
        -0.0036516229156404734,
        0.01909017004072666,
        0.0004517431079875678,
        -0.001898027490824461,
        -0.006486889906227589,
        0.004044101573526859,
        0.013802695088088512,
        -0.006047313567250967,
        -0.029012033715844154,
        0.011271991766989231,
        -0.006292220205068588,
        0.015611236914992332,
        0.0031727987807244062,
        0.00666272034868598,
        0.009111788123846054,
        -0.01690484769642353,
        0.029589762911200523,
        0.008521500043570995,
        0.015071186237037182,
        0.026299219578504562,
        0.0003149642434436828,
        0.018650593236088753,
        0.004537055268883705,
        0.0050582666881382465,
        0.001783423707820475,
        -0.019906524568796158,
        -1.6802998288767412e-05,
        -0.008307991549372673,
        -0.01179320365190506,
        -0.0020785678643733263,
        0.004810220096260309,
        -0.0034695127978920937,
        0.01676669530570507,
        0.01690484769642353,
        0.01568659394979477,
        -0.015372609719634056,
        -0.0016327118501067162,
        0.034211594611406326,
        -0.024088779464364052,
        0.016251763328909874,
        -0.0064806099981069565,
        0.0001965337578440085,
        0.013438474386930466,
        0.021564355120062828,
        0.03504050895571709,
        -0.004694046452641487,
        -0.03443766012787819,
        -0.005557499825954437,
        -0.025281915441155434,
        0.008490101434290409,
        0.0157996267080307,
        0.009130626916885376,
        0.0012418029364198446,
        -0.0018619195325300097,
        -0.022192321717739105,
        -0.006907626986503601,
        -0.0032748430967330933,
        -0.005548080429434776,
        0.019542304798960686,
        -0.006304779555648565,
        -0.0023250444792211056,
        -0.015309813432395458,
        -0.006499449256807566,
        -0.018324051052331924,
        0.005968817975372076,
        -0.006851110141724348,
        -0.017859356477856636,
        -0.02425205148756504,
        -0.0063236188143491745,
        -0.006568525452166796,
        -0.013890610076487064,
        -0.04551498219370842,
        -0.044786542654037476,
        -0.016163846477866173,
        0.00394362723454833,
        -0.017444897443056107,
        0.016364796087145805,
        -0.003949906677007675,
        -0.000322813808452338,
        -0.019981881603598595,
        -0.010625186376273632,
        -0.006342457607388496,
        -0.01692996546626091,
        -0.0006311059114523232,
        -0.006116389762610197,
        0.027002541348338127,
        0.027931932359933853,
        0.025369830429553986,
        0.013978525064885616,
        0.0114603815600276,
        0.006474330555647612,
        -0.008559177629649639,
        -0.01662854291498661,
        0.004559034015983343,
        -0.012094627134501934,
        -0.014204593375325203,
        0.012710033915936947,
        0.020999185740947723,
        0.028459424152970314,
        -0.007026940584182739,
        0.009595322422683239,
        0.0048447586596012115,
        -0.008188677951693535,
        -0.020873593166470528,
        0.0010173050686717033,
        -0.013928287662565708,
        -0.014405542053282261,
        -0.020974067971110344,
        -0.0010471334680914879,
        0.002153923735022545,
        0.010958008468151093,
        0.011234313249588013,
        -0.015548440627753735,
        0.011812042444944382,
        0.008596856147050858,
        0.02079823799431324,
        0.003050345228984952,
        0.026299219578504562,
        -0.01969301700592041,
        0.02742955833673477,
        0.007265567779541016,
        0.020597288385033607,
        -0.013162169605493546,
        0.007711423560976982,
        -0.006794593296945095,
        0.016917407512664795,
        0.009262500330805779,
        0.021451322361826897,
        0.02770586498081684,
        -0.012270457111299038,
        -0.010593787766993046,
        0.005884042475372553,
        0.026173627004027367,
        0.0031162817031145096,
        0.007510474417358637,
        0.004194813314825296,
        -0.007862135767936707,
        0.0027096737176179886,
        -0.020195389166474342,
        -0.004998610354959965,
        -0.013501270674169064,
        0.004712885711342096,
        -0.012201380915939808,
        -0.01872594840824604,
        0.018625473603606224,
        -0.011881118640303612,
        -0.029087388888001442,
        -0.015787066891789436,
        -0.0003167304093949497,
        0.048001728951931,
        0.005959398113191128,
        0.012100907042622566,
        0.01939159259200096,
        0.0028556757606565952,
        -0.005940559320151806,
        0.02068520337343216,
        -0.018675711005926132,
        -0.005522961728274822,
        0.039813049137592316,
        0.0015455815009772778,
        -0.0063236188143491745,
        -0.02416413463652134,
        0.004684627056121826,
        0.004078639671206474,
        -0.015071186237037182,
        -0.024051101878285408,
        0.008050525560975075,
        0.028760846704244614,
        -0.002221429953351617,
        -0.014619050547480583,
        0.006113249808549881,
        -0.033131491392850876,
        0.02471674606204033,
        0.003482071915641427,
        -0.021363407373428345,
        -0.015058627352118492,
        0.012440008111298084,
        -0.023071475327014923,
        0.0088919997215271,
        -0.0305191520601511,
        0.02720349095761776,
        0.03413623571395874,
        -0.005836945027112961,
        -0.006668999791145325,
        0.003796054981648922,
        0.00045880774268880486,
        -0.0013477721950039268,
        -0.007686304859817028,
        0.02833382971584797,
        -0.007058338727802038,
        0.0036641822662204504,
        0.01891433820128441,
        -0.0021272350568324327,
        -0.02079823799431324,
        -0.005208978895097971,
        -0.012320694513618946,
        0.022581661120057106,
        -0.017331864684820175,
        0.005482144188135862,
        -0.011899957433342934,
        0.011328508146107197,
        -0.003271703375503421,
        0.012075788341462612,
        -0.013790135271847248,
        -0.00652456795796752,
        -0.006210584659129381,
        -0.0016672499477863312,
        0.02665088139474392,
        2.0819775272684637e-06,
        -0.001816391944885254,
        -0.01485767774283886,
        -0.017984949052333832,
        -0.024503236636519432,
        -0.04712257534265518,
        -0.004483677912503481,
        0.0015801197150722146,
        -0.029589762911200523,
        -0.004659508354961872,
        -0.005466444883495569,
        0.009551363997161388,
        0.02565869502723217,
        0.016779253259301186,
        -0.012358373031020164,
        0.012553042732179165,
        0.023247305303812027,
        -0.0181733388453722,
        -0.0057301907800138,
        0.018688270822167397,
        0.015975456684827805,
        -0.02584708482027054,
        0.016251763328909874,
        -0.0035699873697012663,
        -0.019178085029125214,
        0.004577872809022665,
        -0.014104118570685387,
        -0.004160275217145681,
        0.011585974134504795,
        0.013388236984610558,
        0.00026119465474039316,
        -0.016113610938191414,
        0.006348737049847841,
        0.02936369553208351,
        0.016867170110344887,
        0.009036432020366192,
        -0.007322084624320269,
        -0.003296822076663375,
        0.037050001323223114,
        -0.014217152260243893,
        0.0041539957746863365,
        -0.007334643974900246,
        -0.012697474099695683,
        0.019655339419841766,
        -0.007052059285342693,
        -0.007667466066777706,
        -0.0007700434071011841,
        -0.03576894849538803,
        -0.003962466027587652,
        -0.0019985020626336336,
        0.008138440549373627,
        0.014342745766043663,
        -0.015837304294109344,
        -0.03210162743926048,
        -0.0056108771823346615,
        -0.016703898087143898,
        0.00034538135514594615,
        0.009984660893678665,
        -0.01095172856003046,
        0.001340707647614181,
        0.013840372674167156,
        0.016716457903385162,
        0.00038973146001808345,
        0.005940559320151806,
        -0.011535737663507462,
        -0.01093916967511177,
        -0.016314558684825897,
        -0.02427716925740242,
        -0.012634677812457085,
        -0.0012151143746450543,
        0.041244812309741974,
        0.006957864388823509,
        -0.018110541626811028,
        -0.0088919997215271,
        -0.005127343349158764,
        -0.02747979573905468,
        -0.03227745741605759,
        -0.002741072094067931,
        -0.0029655699618160725,
        -0.0056234365329146385,
        0.016741575673222542,
        -0.008527779951691628,
        0.022016491740942,
        0.006263962015509605,
        -0.0022748070769011974,
        -0.00584008451551199,
        -0.019630219787359238,
        0.011824601329863071,
        0.004703465849161148,
        0.01513398252427578,
        -0.000919970334507525,
        -0.02328498288989067,
        0.002163343131542206,
        0.0026296081487089396,
        0.011912517249584198,
        -0.0042073726654052734,
        0.00936297420412302,
        -0.009331576526165009,
        -0.010487033985555172,
        -0.014355304650962353,
        -0.0040284027345478535,
        -0.002763050841167569,
        0.007253008428961039,
        0.016364796087145805,
        -0.0157996267080307,
        0.001935705542564392,
        0.0050582666881382465,
        -0.003422415116801858,
        0.003422415116801858,
        -0.00468148710206151,
        0.008433585055172443,
        -0.015360050834715366,
        0.015837304294109344,
        -0.019981881603598595,
        -0.006320478860288858,
        0.003088023280724883,
        -0.02336033806204796,
        0.015159101225435734,
        -0.025420067831873894,
        0.00652456795796752,
        0.0294390507042408,
        0.01068798266351223,
        0.00756699126213789,
        -0.0008603135356679559,
        -0.0018933177925646305,
        0.0024160996545106173,
        0.005488423630595207,
        -0.00935041531920433,
        -0.011315949261188507,
        -2.2285437808022834e-05,
        -0.007372322026640177,
        -0.021149897947907448,
        -0.02966511808335781,
        0.0021617733873426914,
        -0.025721492245793343,
        -0.014330185949802399,
        0.001420773332938552,
        0.03476420417428017,
        0.015787066891789436,
        -0.026550406590104103,
        -0.01319984719157219,
        -0.023159390315413475,
        0.013011457398533821,
        0.009406931698322296,
        -0.013388236984610558,
        0.001562850666232407,
        -0.00962672010064125,
        -0.0042324913665652275,
        0.021916016936302185,
        -0.01762072928249836,
        0.005290614441037178,
        0.018537558615207672,
        0.016176406294107437,
        -0.004251330625265837,
        0.039260439574718475,
        0.2337038666009903,
        -0.012791668996214867,
        0.0035228899214416742,
        0.027354203164577484,
        0.02217976190149784,
        0.013551508076488972,
        0.010210729204118252,
        0.0017080678371712565,
        0.006982983089983463,
        0.0028289873152971268,
        -0.01159853395074606,
        0.0012826207093894482,
        -0.00470974575728178,
        0.006756915245205164,
        0.005450745578855276,
        0.00941949151456356,
        -0.02107454277575016,
        -0.019793491810560226,
        -0.030921051278710365,
        -0.024503236636519432,
        0.006279660854488611,
        -0.03295566141605377,
        -0.025256795808672905,
        -0.014305067248642445,
        0.017495134845376015,
        -0.0072969659231603146,
        -0.008446143940091133,
        0.0011868559522554278,
        0.03363386541604996,
        0.009746033698320389,
        0.004763122648000717,
        -0.011573415249586105,
        0.011228034272789955,
        0.011246873065829277,
        -0.014367864467203617,
        -0.019567424431443214,
        0.03604525327682495,
        0.013363118283450603,
        0.03084569424390793,
        0.003871411085128784,
        -0.016553185880184174,
        -0.025068406015634537,
        -0.004518216010183096,
        -0.017633287236094475,
        0.0027238030452281237,
        0.011096160858869553,
        0.007246728986501694,
        -0.026173627004027367,
        0.018123101443052292,
        0.033985525369644165,
        0.004656368400901556,
        0.007014381233602762,
        0.02300867810845375,
        0.044032983481884,
        0.00041406514355912805,
        -0.015950338914990425,
        0.008653372526168823,
        0.016666220501065254,
        -0.005259216297417879,
        -0.000553002639207989,
        0.0005902881384827197,
        0.03185044229030609,
        -2.5854542400338687e-05,
        0.03943626955151558,
        -0.023598965257406235,
        0.007472796365618706,
        -0.02519400045275688,
        -0.002959290286526084,
        0.003968745935708284,
        -0.0035166102461516857,
        -0.01667878031730652,
        -0.0014144936576485634,
        0.00603475421667099,
        -0.00840846635401249,
        -0.029514405876398087,
        -0.009519966319203377,
        0.010499592870473862,
        0.0036798813380301,
        0.03335756063461304,
        0.0274546779692173,
        -0.009388092905282974,
        -0.0015463664894923568,
        -0.009859067387878895,
        -0.02010747417807579,
        -0.038657594472169876,
        -0.029037151485681534,
        0.0022512583527714014,
        -0.00729068648070097,
        -0.012107186019420624,
        -0.023372897878289223,
        -0.014531135559082031,
        -0.010254686698317528,
        0.001805402571335435,
        -0.006069292314350605,
        0.023686882108449936,
        0.026198744773864746,
        -0.0034569534473121166,
        0.010612627491354942,
        -0.02091127075254917,
        0.012364652007818222,
        -0.005510402377694845,
        -0.00246790680103004,
        7.820140308467671e-05,
        0.0090866694226861,
        0.0022292796056717634,
        0.009903025813400745,
        -0.0071148560382425785,
        0.0007563066901639104,
        -0.00300324778072536,
        -0.01563635654747486,
        -0.022280236706137657,
        -0.0016735296230763197,
        -0.006807152647525072,
        -0.01108988095074892,
        -0.00680087273940444,
        0.00976487249135971,
        -0.005080245900899172,
        -0.00265472661703825,
        -0.0003416528052184731,
        -0.008885719813406467,
        -0.003959326073527336,
        -0.013915728777647018,
        -0.014794881455600262,
        0.002414529677480459,
        -0.006169767118990421,
        -0.014807440340518951,
        -0.006888788193464279,
        0.00915574561804533,
        0.014531135559082031,
        -0.030016779899597168,
        0.00854661874473095,
        -0.0002739502233453095,
        0.021237812936306,
        0.008113321848213673,
        -0.006989262532442808,
        0.007334643974900246,
        0.016992762684822083,
        -0.0028666651342064142,
        -0.016879728063941002,
        0.0035448686685413122,
        -0.0021131059620529413,
        -0.010003499686717987,
        0.0088919997215271,
        -0.0073534827679395676,
        -0.014920474030077457,
        -0.01649039052426815,
        0.03112199902534485,
        -0.027555152773857117,
        -0.010970567353069782,
        0.002863525412976742,
        -0.04272681474685669,
        -0.018185898661613464,
        -0.013903168961405754,
        -0.0035385889932513237,
        0.027931932359933853,
        -0.020735440775752068,
        -0.019680457189679146,
        -0.02485489845275879,
        -0.006210584659129381,
        0.009896745905280113,
        -0.012502805329859257,
        0.009645558893680573,
        0.02996654249727726,
        -0.007862135767936707,
        -0.038657594472169876,
        -0.013149609789252281,
        -0.16045789420604706,
        0.014430660754442215,
        0.011523177847266197,
        -0.012760271318256855,
        0.02066008374094963,
        0.015171661041676998,
        0.024641389027237892,
        0.004643809515982866,
        -0.009067830629646778,
        0.018575238063931465,
        0.009859067387878895,
        -0.01288586389273405,
        -0.014945592731237411,
        -0.014305067248642445,
        0.005290614441037178,
        -0.01612616889178753,
        -0.008182398043572903,
        0.012352093122899532,
        0.029740475118160248,
        0.028685491532087326,
        0.035115864127874374,
        -0.007334643974900246,
        -0.0009168304968625307,
        -0.00477882195264101,
        0.013714779168367386,
        -0.007811898365616798,
        0.0007111715967766941,
        0.03393528610467911,
        -0.010053737089037895,
        -0.007020661141723394,
        -0.016000576317310333,
        -0.02888644114136696,
        0.02582196518778801,
        0.004559034015983343,
        0.021966254338622093,
        -0.006430373061448336,
        0.012446288019418716,
        -0.042601220309734344,
        -0.01635223627090454,
        0.024930253624916077,
        0.032855186611413956,
        0.018625473603606224,
        0.019228322431445122,
        -0.013526389375329018,
        -0.015410288237035275,
        -0.005224677734076977,
        0.020609848201274872,
        0.015083746053278446,
        0.01621408388018608,
        -0.0070897373370826244,
        0.008119601756334305,
        -0.003028366481885314,
        -0.0005110073834657669,
        -0.00622942391782999,
        0.019981881603598595,
        0.029489288106560707,
        0.006888788193464279,
        0.02452835626900196,
        0.0024773261975497007,
        -0.008458703756332397,
        -0.0002564848982729018,
        -0.012370931915938854,
        -0.006254542153328657,
        -0.004706605803221464,
        0.006050453521311283,
        -0.005224677734076977,
        0.0014686556532979012,
        0.009689517319202423,
        -0.00024942029267549515,
        0.018349168822169304,
        -0.008910838514566422,
        -0.023084033280611038,
        -0.01039911899715662,
        -0.02554566040635109,
        0.006825991440564394,
        0.008389626629650593,
        -0.03378457576036453,
        0.03109688125550747,
        -0.0114603815600276,
        -0.014832559041678905,
        -0.008295431733131409,
        0.021313169971108437,
        -0.0024836058728396893,
        0.007070898078382015,
        -0.023121710866689682,
        0.00038973146001808345,
        0.0032465846743434668,
        0.012653516605496407,
        -0.03134806826710701,
        -0.027002541348338127,
        0.007453957572579384,
        -0.017984949052333832,
        -0.014166914857923985,
        -0.018022626638412476,
        0.00543504673987627,
        0.015598678030073643,
        0.012534203007817268,
        0.01623920351266861,
        -0.01571171171963215,
        -0.019893966615200043,
        -0.002260677982121706,
        0.017143474891781807,
        -0.016013136133551598,
        0.011190355755388737,
        0.03049403429031372,
        -0.005469584837555885,
        0.017532814294099808,
        0.007981449365615845,
        0.022405831143260002,
        -0.03169972822070122,
        -0.03217698261141777,
        0.006631322205066681,
        0.014694406650960445,
        0.026148507371544838,
        0.01623920351266861,
        0.026449931785464287,
        -0.013262644410133362,
        -0.01734442450106144,
        0.0048321993090212345,
        -0.013099372386932373,
        0.052045829594135284,
        0.006223144009709358,
        -0.04420881345868111,
        -0.0010887362295761704,
        0.0028792244847863913,
        0.0002376459160586819,
        -0.08409722149372101,
        -0.03438742458820343,
        0.004712885711342096,
        0.014405542053282261,
        -0.001269276486709714,
        0.03835617005825043,
        0.004216792527586222,
        -0.0017881334060803056,
        0.0019875126890838146,
        0.005535521078854799,
        0.008923398330807686,
        -0.03049403429031372,
        -0.01787191443145275,
        -0.010568669065833092,
        0.04071732237935066,
        -0.02720349095761776,
        0.012779110111296177,
        0.0010651875054463744,
        -0.033960405737161636,
        0.011604813858866692,
        -0.008678491227328777,
        0.007868414744734764,
        0.0028478263411670923,
        -0.001828951295465231,
        -0.02222999930381775,
        0.012201380915939808,
        -0.022971000522375107,
        0.03127271309494972,
        0.0016358516877517104,
        -0.004926394205540419,
        -0.016754135489463806,
        -0.024063661694526672,
        -0.014166914857923985,
        -0.008307991549372673,
        -0.004449139814823866,
        0.013626864179968834,
        -0.04099362716078758,
        0.03102152608335018,
        0.013413355685770512,
        -0.04106898233294487,
        0.027253728359937668,
        0.010593787766993046,
        -0.006461771205067635,
        -0.05581362545490265,
        -0.012659796513617039,
        0.017168592661619186,
        -0.0019137266790494323,
        0.03777844086289406,
        0.01870083063840866,
        -0.009048991836607456,
        -0.03222722187638283,
        -0.03139830380678177,
        -0.021916016936302185,
        0.010832414962351322,
        0.01048075407743454,
        -0.0017535953084006906,
        0.008056805469095707,
        0.028911558911204338,
        -0.009921864606440067,
        0.011962753720581532,
        0.013375678099691868,
        0.006147787906229496,
        -0.02038377895951271,
        0.01704300008714199,
        -0.012559321708977222,
        0.018575238063931465,
        -0.012697474099695683,
        0.0232221856713295,
        0.0201577115803957,
        0.00969579629600048,
        -0.01563635654747486,
        0.028409186750650406,
        0.006245122756808996,
        0.010053737089037895,
        -0.03792915120720863,
        -0.005469584837555885,
        -0.015837304294109344,
        -0.020258186385035515,
        0.006606203503906727,
        -0.005811826325953007,
        -0.03310637176036835,
        -0.020735440775752068,
        0.009218541905283928,
        -0.008182398043572903,
        0.026022914797067642,
        0.01360174547880888,
        -0.01054983027279377,
        0.0057710083201527596,
        -0.015347491018474102,
        -0.03544240817427635,
        -0.03054427169263363,
        0.01035516057163477,
        0.006989262532442808,
        -0.011510618962347507,
        0.007529313676059246,
        0.014757202938199043,
        -0.01180576253682375,
        -0.0010895212180912495,
        0.01607593148946762,
        0.010807296261191368,
        -0.02485489845275879,
        -0.011824601329863071,
        -0.0650070533156395,
        0.032352812588214874,
        -0.016113610938191414,
        -0.019353915005922318,
        0.0009600031771697104,
        -0.022016491740942,
        0.006687839049845934,
        -0.01512142363935709,
        0.002095836913213134,
        -0.00881036464124918,
        -0.018675711005926132,
        -0.0048635974526405334,
        -0.002117815660312772,
        0.012615839019417763,
        -0.02798216976225376,
        0.0032685634214431047,
        0.020182831212878227,
        0.014192033559083939,
        0.005880902521312237,
        -0.002144504338502884,
        0.00037756460369564593,
        -0.03245328739285469,
        -0.016502948477864265,
        0.001036928966641426,
        -0.023774797096848488,
        0.011177796870470047,
        -0.0314234234392643,
        0.0005577123956754804,
        -0.002411389723420143,
        -0.00874128844588995,
        0.0064366525039076805,
        -0.042576100677251816,
        0.0064021144062280655,
        0.022682135924696922,
        0.0008108612382784486,
        -0.010122813284397125,
        0.006794593296945095,
        0.015083746053278446,
        0.007366042118519545,
        0.020333541557192802,
        -0.030895931646227837,
        -0.033181726932525635,
        0.015611236914992332,
        -0.010832414962351322,
        0.0036013855133205652,
        0.004634389653801918,
        -0.005340851843357086,
        -0.0033941566944122314,
        0.030418677255511284,
        0.009808829985558987,
        -0.003227745648473501,
        0.029263220727443695,
        -0.028409186750650406,
        -0.013036576099693775,
        -0.012841906398534775,
        -0.008351949043571949,
        -0.017143474891781807,
        -0.0003757984668482095,
        -0.008333110250532627,
        -0.04493725299835205,
        0.02248118631541729,
        0.00018220828496851027,
        0.02966511808335781,
        -0.007868414744734764,
        0.005246656946837902,
        0.010813576169312,
        -0.0205470509827137,
        -0.002692404668778181,
        -0.014619050547480583,
        -0.02234303392469883,
        -0.006920186337083578,
        -0.00593741936609149,
        0.00517130084335804,
        0.005312593188136816,
        0.019630219787359238,
        0.020647525787353516,
        -0.003180648200213909,
        0.0170932374894619,
        -0.040616847574710846,
        0.037728201597929,
        0.020258186385035515,
        0.021112220361828804,
        -0.030318202450871468,
        -0.0004332966054789722,
        0.034563254565000534,
        0.006088131107389927,
        -0.006254542153328657,
        -0.01731930486857891,
        -0.009205983020365238,
        0.00310058263130486,
        -0.03607037290930748,
        -0.0266759991645813,
        0.006939025595784187,
        0.024289729073643684,
        -0.0038368727546185255,
        0.00473800441250205,
        0.017520254477858543,
        0.00783073715865612,
        0.017658406868577003,
        0.023372897878289223,
        0.0023721419274806976,
        0.009438330307602882,
        -0.016000576317310333,
        -0.004552754107862711,
        -0.008785245940089226,
        0.005924860015511513,
        -0.006289080251008272,
        -0.037878915667533875,
        -0.007328364532440901,
        0.012898423708975315,
        0.011812042444944382,
        0.029790712520480156,
        -0.006361296400427818,
        0.015046067535877228,
        -0.02499305084347725,
        -0.00584008451551199,
        0.02609827183187008,
        -0.0014945593429729342,
        -0.028434304520487785,
        0.04242539033293724,
        -0.008973635733127594,
        0.0035794067662209272,
        0.02770586498081684,
        -0.015071186237037182,
        0.008433585055172443,
        0.002970279660075903,
        -0.007793059106916189,
        -0.0035228899214416742,
        0.01905249059200287,
        -0.0050017498433589935,
        0.011717847548425198,
        -0.010041178204119205,
        -0.016452711075544357,
        -0.015410288237035275,
        0.0009136906592175364,
        0.01372733898460865,
        -0.008351949043571949,
        0.024339966475963593,
        -0.0161889661103487,
        0.05631599947810173,
        -0.012722592800855637,
        -0.0041634151712059975,
        0.02287052571773529,
        0.014292508363723755,
        0.020735440775752068,
        0.024427881464362144,
        0.018072864040732384,
        -0.008559177629649639,
        -0.02024562656879425,
        -0.0038494321051985025,
        0.006756915245205164,
        -0.0008289152756333351,
        -0.02259422093629837,
        -0.011127559468150139,
        0.004289008677005768,
        -0.01591266132891178,
        0.004398902412503958,
        -0.019630219787359238,
        0.01292354241013527,
        0.018324051052331924,
        0.008942237123847008,
        0.0243902038782835,
        0.013212407007813454,
        -0.018072864040732384,
        -0.019931644201278687,
        0.00423877127468586,
        0.0012331684119999409,
        -0.018349168822169304,
        -0.016151288524270058,
        0.017243949696421623,
        0.0007649412145838141,
        -0.0012857605470344424,
        -0.01260955911129713,
        0.006656440440565348,
        0.002959290286526084,
        -0.007108576130121946,
        -0.009513686411082745,
        -0.0036422032862901688,
        0.010562390089035034,
        0.02289564348757267,
        0.0006240413058549166,
        -0.018097983673214912,
        -0.03295566141605377,
        0.006813432089984417,
        0.002750491490587592,
        -0.02936369553208351,
        -0.005384809337556362,
        -0.02317194826900959
      ]
    },
    {
      "object": "embedding",
      "index": 1,
      "embedding": [
        0.01023657713085413,
        -0.006719755474478006,
        0.0031741075217723846,
        -0.007405183278024197,
        -0.010435158386826515,
        0.005877383518964052,
        -0.011242298409342766,
        0.0028522126376628876,
        0.00522398529574275,
        -0.016347775235772133,
        0.03156811743974686,
        0.029928214848041534,
        0.005617945920675993,
        0.005086258985102177,
        0.02043471857905388,
        0.0074948654510080814,
        0.02780146524310112,
        -0.012901417911052704,
        0.013311393558979034,
        0.0038499266374856234,
        -0.0013556417543441057,
        0.02084469422698021,
        0.011761173605918884,
        -0.014605378732085228,
        -0.002282090485095978,
        -0.002938691759482026,
        0.004013276193290949,
        -0.02040909416973591,
        0.01891012117266655,
        -0.025367235764861107,
        0.01896136812865734,
        -0.010870757512748241,
        -0.010473594069480896,
        -0.005979877430945635,
        -0.004096552263945341,
        -0.022228362038731575,
        -0.011773984879255295,
        -0.0177955012768507,
        0.02324048802256584,
        -0.006566014606505632,
        0.006367432419210672,
        0.0005869376473128796,
        -0.0010113263269886374,
        0.0065083615481853485,
        -0.038076478987932205,
        0.01811579428613186,
        0.023471100255846977,
        -0.009845818392932415,
        -0.016091540455818176,
        0.02248459681868553,
        0.02122904546558857,
        0.0023685696069151163,
        -0.017590513452887535,
        -0.013811050914227962,
        -0.030312566086649895,
        0.014323520474135876,
        -0.03871706500649452,
        0.029825720936059952,
        0.02003755420446396,
        -0.0033887040335685015,
        0.019935060292482376,
        -0.012920635752379894,
        -0.019140733405947685,
        0.019896624609827995,
        -0.012786111794412136,
        0.002304510911926627,
        0.005758875049650669,
        -6.095583376009017e-05,
        -0.03625721111893654,
        0.0196532029658556,
        0.013977603055536747,
        0.011741955764591694,
        0.006931148935109377,
        -0.009788165800273418,
        0.012030219659209251,
        0.003657750552520156,
        -0.016104351729154587,
        0.0059318337589502335,
        0.006143227219581604,
        -0.0033887040335685015,
        0.021126551553606987,
        -0.046916574239730835,
        -0.0228177011013031,
        0.01429789699614048,
        0.03195246681571007,
        -0.004461687058210373,
        -0.0117803905159235,
        0.007040048483759165,
        -0.01507941260933876,
        -0.011613838374614716,
        0.014835990034043789,
        0.017910806462168694,
        0.013759803958237171,
        0.021100929006934166,
        -0.01820547692477703,
        0.003545647719874978,
        -0.00044400669867172837,
        0.032977405935525894,
        -0.003929999656975269,
        -0.031439997255802155,
        -0.005480220075696707,
        -0.02357359416782856,
        0.00333745707757771,
        -0.006777408067137003,
        0.000521677837241441,
        -0.0029210757929831743,
        0.00032910145819187164,
        -0.003926796838641167,
        0.026443421840667725,
        -0.010403129272162914,
        -0.03277241811156273,
        0.02312518283724785,
        0.010614522732794285,
        -0.027596479281783104,
        -0.01773144118487835,
        -0.009339755401015282,
        0.014067285694181919,
        0.004765965510159731,
        -0.013823863118886948,
        -0.027724595740437508,
        0.0022884963545948267,
        0.020972810685634613,
        0.020575648173689842,
        -0.03743589296936989,
        0.016129974275827408,
        -0.004756357055157423,
        -0.01506660133600235,
        -0.028365181758999825,
        -0.010576087981462479,
        -0.0010553667088970542,
        0.018295157700777054,
        0.0022580684162676334,
        0.024034816771745682,
        -0.009512714110314846,
        -0.012882200069725513,
        0.015604693442583084,
        0.004615427926182747,
        0.01621965691447258,
        -0.004429657477885485,
        -0.00846855714917183,
        0.014054473489522934,
        0.03046630695462227,
        0.009807383641600609,
        -0.018666699528694153,
        -0.005236797034740448,
        0.013964791782200336,
        0.002373374067246914,
        -0.0050446209497749805,
        0.007213007193058729,
        -0.0037378238048404455,
        0.01932009682059288,
        0.0057748896069824696,
        0.01694992557168007,
        -0.014361955225467682,
        0.0234967228025198,
        0.019806943833827972,
        -0.005627554841339588,
        0.011914914473891258,
        -0.004134987480938435,
        -0.01775706559419632,
        0.0029723227489739656,
        -0.008686356246471405,
        0.005156723782420158,
        -0.018038922920823097,
        0.022868948057293892,
        0.014938483946025372,
        0.01589936390519142,
        -0.0007162560941651464,
        -0.009980342350900173,
        -0.030389437451958656,
        -0.008948997594416142,
        0.015207529999315739,
        -0.03758963197469711,
        0.020575648173689842,
        -0.022177115082740784,
        0.035334765911102295,
        0.005444987677037716,
        0.012677212245762348,
        -0.004330366384238005,
        -0.022279608994722366,
        -0.04430298134684563,
        0.01099887490272522,
        0.004112567286938429,
        0.03018444962799549,
        0.008673544973134995,
        -0.0011778789339587092,
        0.014618190005421638,
        -0.005390537902712822,
        0.01775706559419632,
        -0.009474278427660465,
        0.006604449823498726,
        0.02739149145781994,
        0.008058581501245499,
        0.004231075756251812,
        -0.6772795915603638,
        -0.003121259156614542,
        0.025303177535533905,
        0.0057332515716552734,
        0.010127676650881767,
        0.0010986062698066235,
        0.02974884957075119,
        0.021049682050943375,
        0.01465662568807602,
        0.015105036087334156,
        -0.022894570603966713,
        0.01137682143598795,
        -0.030261319130659103,
        -0.01026860624551773,
        -0.005838948301970959,
        -0.006393055897206068,
        -0.004516136832535267,
        -0.022676771506667137,
        0.0039940583519637585,
        0.006681320257484913,
        -0.006175256334245205,
        0.037333399057388306,
        0.002186002442613244,
        -0.0066300733014941216,
        0.01861545257270336,
        -0.004410440102219582,
        -0.002250061137601733,
        -0.02284332364797592,
        -0.00948709063231945,
        0.0037602444645017385,
        -0.001710366690531373,
        0.009800978004932404,
        -0.0129590705037117,
        0.006847872864454985,
        0.07159198075532913,
        -0.0021379583049565554,
        -0.00130119186360389,
        -0.0110373105853796,
        -0.0034111246932297945,
        0.028032077476382256,
        -0.013631686568260193,
        -0.019140733405947685,
        0.012433789670467377,
        -0.0001967802527360618,
        -0.01329858135432005,
        -0.007482053712010384,
        0.011953349225223064,
        -0.003997261635959148,
        -0.010377505794167519,
        -0.018846062943339348,
        0.004202249459922314,
        -0.0018769192975014448,
        0.009980342350900173,
        0.02435510978102684,
        -0.014592566527426243,
        0.018231099471449852,
        0.03131188079714775,
        -0.0053296820260584354,
        -0.0018192664720118046,
        -0.002783349482342601,
        0.009397407993674278,
        0.009301319718360901,
        -0.000698239600751549,
        0.014361955225467682,
        -0.014925671741366386,
        0.017500830814242363,
        -0.01782112382352352,
        0.032285574823617935,
        0.014631002210080624,
        -0.014823177829384804,
        -0.006114400923252106,
        0.001224321429617703,
        -0.008840097114443779,
        -0.004362395964562893,
        0.014413202181458473,
        0.028775157406926155,
        0.00983941275626421,
        -0.01080669928342104,
        -0.009262884967029095,
        0.00967926625162363,
        0.015655940398573875,
        0.0043399753049016,
        0.004109364002943039,
        -0.0020130439661443233,
        0.03015882521867752,
        -0.023868262767791748,
        -0.023868262767791748,
        -0.009358973242342472,
        -0.006130415480583906,
        0.006937554571777582,
        0.002025855705142021,
        0.011870073154568672,
        -0.010403129272162914,
        -0.00846855714917183,
        0.023471100255846977,
        0.003901173360645771,
        -0.023855451494455338,
        -0.004756357055157423,
        0.025636283680796623,
        -0.005633960478007793,
        0.008474962785840034,
        -0.004141393583267927,
        0.011005280539393425,
        -0.012555500492453575,
        -0.013798239640891552,
        0.006847872864454985,
        -0.022971441969275475,
        0.02202337421476841,
        0.029646355658769608,
        -0.031132517382502556,
        0.005563496146351099,
        0.0035136183723807335,
        -0.014272273518145084,
        0.005041418131440878,
        -0.0041157701052725315,
        -0.022881759330630302,
        0.0003765449218917638,
        0.0010201344266533852,
        0.006389853078871965,
        -0.01926884986460209,
        0.01627090387046337,
        -0.0010417542653158307,
        0.008398092351853848,
        0.0011298349127173424,
        0.005438581574708223,
        -0.003050794592127204,
        0.014605378732085228,
        0.010678581893444061,
        -0.042304351925849915,
        0.005249608773738146,
        -0.003196527948603034,
        0.004705110099166632,
        0.002853814046829939,
        -0.010242982767522335,
        -0.01078748144209385,
        -0.0075909532606601715,
        0.01176757924258709,
        -0.014182590879499912,
        -0.0024118092842400074,
        0.021510904654860497,
        0.005288043990731239,
        0.001864107558503747,
        -0.017564889043569565,
        -0.011741955764591694,
        0.025200683623552322,
        -0.01003799494355917,
        0.002890647854655981,
        0.003169303061440587,
        -0.007981711067259312,
        0.004801197908818722,
        0.028057700023055077,
        0.004791588988155127,
        8.422715472988784e-05,
        0.016091540455818176,
        -0.00481400964781642,
        -0.0185385812073946,
        -0.00987784843891859,
        -0.019409779459238052,
        -0.04791589081287384,
        -0.007853593677282333,
        0.02980009652674198,
        0.01744958385825157,
        -0.0005477016675285995,
        -0.005957457236945629,
        -0.011953349225223064,
        -0.0197172611951828,
        -0.007450024131685495,
        0.020216919481754303,
        -0.007385965436697006,
        -0.024239802733063698,
        -4.3239608203293756e-05,
        -0.018333593383431435,
        -0.011581809259951115,
        0.00608877744525671,
        -0.012440195307135582,
        0.015655940398573875,
        -0.0077446941286325455,
        -0.020614081993699074,
        -0.021395597606897354,
        -0.015271589159965515,
        -0.0005056631634943187,
        -0.011223080568015575,
        -0.018769193440675735,
        0.011613838374614716,
        0.022984253242611885,
        -0.005845354404300451,
        0.002275684615597129,
        0.03618033975362778,
        -0.02429104968905449,
        0.004833227023482323,
        0.0247394610196352,
        0.03738464415073395,
        -0.0323624424636364,
        -0.01097965706139803,
        0.008744009770452976,
        -0.018602639436721802,
        0.004000464454293251,
        0.005938239395618439,
        0.02201056107878685,
        0.011114181019365788,
        0.005358508322387934,
        -0.013048752211034298,
        0.01488723699003458,
        0.005803716368973255,
        0.004477701615542173,
        -0.01932009682059288,
        0.0016703300643712282,
        -0.030363813042640686,
        -0.002752921776846051,
        0.017705818638205528,
        -0.0026488262228667736,
        0.015361270867288113,
        -0.0025447309017181396,
        0.027647726237773895,
        0.008378875441849232,
        0.04028009623289108,
        0.007136136759072542,
        -0.01488723699003458,
        -0.01627090387046337,
        -0.013144840486347675,
        -0.0019249633187428117,
        0.016450269147753716,
        -0.012183960527181625,
        0.008564645424485207,
        -0.006649290677160025,
        0.028032077476382256,
        0.004836430307477713,
        0.03659031540155411,
        0.01620684564113617,
        -0.03505290672183037,
        -0.0251750610768795,
        -0.01411853265017271,
        0.012280048802495003,
        -0.0006562010967172682,
        -0.0055795107036828995,
        -0.0033822981640696526,
        0.01121667493134737,
        -0.008628703653812408,
        0.04125379025936127,
        0.0010465586092323065,
        -0.018423276022076607,
        0.007616576738655567,
        0.037640880793333054,
        0.004807603545486927,
        -0.016885867342352867,
        0.02052440121769905,
        0.005595525726675987,
        0.007027236744761467,
        -0.003356674686074257,
        0.022689582780003548,
        -0.025034131482243538,
        0.009557554498314857,
        -0.016155598685145378,
        0.00549943745136261,
        0.006598043721169233,
        -0.013311393558979034,
        0.005717237014323473,
        -0.014938483946025372,
        0.020280977711081505,
        0.0401776023209095,
        -0.000625372864305973,
        -0.008955403231084347,
        0.0024742663372308016,
        0.005147114861756563,
        -0.014092909172177315,
        -0.011024498380720615,
        0.012267236597836018,
        -0.016091540455818176,
        0.008833691477775574,
        0.009435843676328659,
        -0.019601956009864807,
        -0.0034463568590581417,
        0.014682249166071415,
        -0.009756136685609818,
        0.026520293205976486,
        0.010768263600766659,
        0.024893201887607574,
        -0.007014425005763769,
        -0.01701398566365242,
        -0.0014581356663256884,
        0.0028249877505004406,
        -0.015553447417914867,
        -0.006124009378254414,
        0.02238210290670395,
        -0.000762698648031801,
        -0.03464293107390404,
        -0.01215833704918623,
        -0.001716772560030222,
        -0.008692762814462185,
        0.008077799342572689,
        -0.01653994992375374,
        0.021754326298832893,
        -0.003926796838641167,
        0.0035264301113784313,
        -0.0023925916757434607,
        -0.014618190005421638,
        0.01852576993405819,
        -0.01816704124212265,
        -0.0038787529338151217,
        0.021895255893468857,
        0.006037530489265919,
        0.00013292176299728453,
        0.0070656719617545605,
        -0.016411833465099335,
        -0.00541936419904232,
        -0.0051439120434224606,
        0.0014685451751574874,
        -0.011351197957992554,
        0.003955623134970665,
        -0.02509818971157074,
        -0.01705241948366165,
        -0.023881075903773308,
        -0.010697798803448677,
        -0.014912860468029976,
        0.008526209741830826,
        -0.0033534718677401543,
        -0.013618875294923782,
        0.018448898568749428,
        0.03259305655956268,
        0.012446600943803787,
        -0.003862738376483321,
        -0.0021667848341166973,
        -0.009916283190250397,
        -0.027647726237773895,
        0.09865036606788635,
        0.006809437647461891,
        -0.007751100230962038,
        0.014682249166071415,
        -0.026827774941921234,
        -0.004753153771162033,
        -0.03133750334382057,
        -0.031004400923848152,
        -0.01702679693698883,
        0.006242518313229084,
        0.0016134779434651136,
        0.021036868914961815,
        -0.01583530567586422,
        0.011927725747227669,
        0.009724107570946217,
        0.011709926649928093,
        0.01588655263185501,
        -0.007321906741708517,
        -0.006399461999535561,
        -0.003433545120060444,
        -0.019102297723293304,
        0.016475891694426537,
        -0.017603324726223946,
        0.013849485665559769,
        -0.004676283337175846,
        0.0009240464423783123,
        -0.010825916193425655,
        0.007001613266766071,
        -0.008462151512503624,
        -0.007687041535973549,
        0.002632811665534973,
        0.005361711140722036,
        0.006835061125457287,
        0.010678581893444061,
        -0.01044156402349472,
        0.0174239594489336,
        0.011985378339886665,
        0.02315080538392067,
        0.011549779213964939,
        -0.016680879518389702,
        0.011223080568015575,
        0.002693667309358716,
        -0.004775574430823326,
        -0.02509818971157074,
        -0.009595990180969238,
        -0.005316870287060738,
        -0.02504694275557995,
        0.03051755391061306,
        -0.013003911823034286,
        -0.029441367834806442,
        0.025713153183460236,
        -0.0028185818810015917,
        0.010691393166780472,
        -0.02595657669007778,
        0.0003090831160079688,
        0.01659119687974453,
        -0.011639461852610111,
        -0.035001661628484726,
        -0.010902786627411842,
        -0.0024406355805695057,
        -0.010825916193425655,
        -0.029979461804032326,
        -0.01656557433307171,
        -0.009134767577052116,
        -0.013798239640891552,
        -0.034591685980558395,
        -0.03797398507595062,
        -0.012305672280490398,
        -0.0393064059317112,
        -0.013913544826209545,
        0.022561466321349144,
        -0.043534278869628906,
        -0.020357847213745117,
        -0.0037602444645017385,
        0.009653642773628235,
        0.012408166192471981,
        0.007373153697699308,
        -0.025034131482243538,
        0.007161760237067938,
        0.003913985099643469,
        -0.0014597370754927397,
        -0.023483911529183388,
        0.002860219916328788,
        -0.0022228360176086426,
        0.0009752933401614428,
        0.0060503422282636166,
        0.013798239640891552,
        0.004894082900136709,
        -0.034566063433885574,
        0.02857016958296299,
        0.020024742931127548,
        0.009346161037683487,
        0.019140733405947685,
        -0.0030187652446329594,
        0.014246650040149689,
        -0.015374083071947098,
        -0.003545647719874978,
        0.016142787411808968,
        -0.015156283043324947,
        0.00502540310844779,
        0.007988116703927517,
        -0.005896601360291243,
        -0.00850058626383543,
        0.006473129615187645,
        0.005573105067014694,
        0.013951979577541351,
        0.02664840966463089,
        0.022305231541395187,
        -0.0019217603839933872,
        -0.011120586656033993,
        0.020947188138961792,
        -0.02203618548810482,
        -0.009403813630342484,
        -0.008263569325208664,
        0.005002982914447784,
        0.02397075667977333,
        0.030825035646557808,
        0.03218308091163635,
        0.008071393705904484,
        -0.012645183131098747,
        -0.0004888477851636708,
        -0.023445475846529007,
        -0.001793642994016409,
        0.009320537559688091,
        -0.002629608614370227,
        0.0033534718677401543,
        -0.009749731048941612,
        -0.021446844562888145,
        -0.004551369231194258,
        -0.0021443641744554043,
        -0.014336331747472286,
        0.018436087295413017,
        -0.0060823713429272175,
        -0.00885931495577097,
        -0.026955891400575638,
        -0.00512469420209527,
        -0.014797554351389408,
        0.01046078186482191,
        -0.005541075486689806,
        -0.016527138650417328,
        -0.02511100098490715,
        -0.0049997796304523945,
        -0.012023814022541046,
        0.017231784760951996,
        -0.03443794697523117,
        -0.03218308091163635,
        -0.010684987530112267,
        0.01390073262155056,
        -0.003420733381062746,
        0.01892293430864811,
        -0.009000244550406933,
        -0.0045737894251942635,
        -0.017577700316905975,
        0.010729828849434853,
        -0.004916503559798002,
        -0.024790707975625992,
        -0.020767822861671448,
        -0.019115108996629715,
        0.017193349078297615,
        0.027160879224538803,
        0.029236380010843277,
        0.007879217155277729,
        0.022599902004003525,
        0.018026111647486687,
        -0.005970268975943327,
        -0.0005601130542345345,
        -0.005742860492318869,
        -0.009813789278268814,
        -0.018666699528694153,
        0.012837358750402927,
        0.008180293254554272,
        0.010922004468739033,
        0.007981711067259312,
        0.0018689119024202228,
        0.004576992709189653,
        0.005787701345980167,
        -0.006642885040491819,
        -0.013670122250914574,
        -0.00983941275626421,
        -0.01294625923037529,
        -0.013285770080983639,
        -0.0135163813829422,
        -0.0028377994894981384,
        0.00687349634245038,
        0.001427707727998495,
        -0.014387578703463078,
        0.021523715928196907,
        0.0060599506832659245,
        -0.009615207090973854,
        -0.003865941194817424,
        0.01852576993405819,
        -0.020652517676353455,
        0.022497408092021942,
        -0.009423031471669674,
        0.013644498772919178,
        -0.020934375002980232,
        0.007200195454061031,
        0.013580439612269402,
        0.011658679693937302,
        0.005355305504053831,
        -0.010114865377545357,
        0.024508850648999214,
        -0.01703960821032524,
        -0.008410904556512833,
        0.005451393313705921,
        0.01741114817559719,
        -0.0021139364689588547,
        -0.014195403084158897,
        0.008955403231084347,
        0.00019858189625665545,
        0.012395353987812996,
        -0.02900576964020729,
        -0.023663274943828583,
        -0.0270327627658844,
        -0.007962493225932121,
        -0.009205232374370098,
        -0.01616840995848179,
        0.013606063090264797,
        -0.013926356099545956,
        -0.03935765102505684,
        -0.018666699528694153,
        -0.005313667468726635,
        0.025777211412787437,
        0.0016703300643712282,
        0.015156283043324947,
        0.0024758679792284966,
        -0.02739149145781994,
        -0.0009432640508748591,
        0.01311921700835228,
        -0.013670122250914574,
        -0.00221963319927454,
        0.039229534566402435,
        -0.002186002442613244,
        -0.004865256603807211,
        -0.002150770043954253,
        -0.0204603411257267,
        0.021113740280270576,
        -0.010736234486103058,
        -0.03448919206857681,
        0.011402444913983345,
        0.007072078064084053,
        0.009768947958946228,
        -0.009032273665070534,
        -0.0029226772021502256,
        -0.013542004860937595,
        0.03413046523928642,
        -0.006847872864454985,
        -0.027519607916474342,
        -0.021895255893468857,
        0.0026216013357043266,
        -0.008052175864577293,
        0.025572223588824272,
        -0.035795990377664566,
        0.036487821489572525,
        0.03525789454579353,
        -0.022715207189321518,
        0.003015562193468213,
        0.001989021897315979,
        0.014579755254089832,
        -0.004801197908818722,
        -0.011056527495384216,
        0.03623158857226372,
        0.0025671515613794327,
        -0.021100929006934166,
        0.004980562254786491,
        -0.018410464748740196,
        -0.001675134408287704,
        0.017859559506177902,
        0.00018567006918601692,
        0.018282346427440643,
        -0.007155354134738445,
        -0.0038371148984879255,
        -0.01964038982987404,
        0.00597347179427743,
        0.01121026836335659,
        -0.010889975354075432,
        -0.0077126650139689445,
        -0.02195931412279606,
        -0.0038339118473231792,
        -0.010185330174863338,
        0.021818386390805244,
        0.0065307822078466415,
        0.0014765525702387094,
        -0.009576772339642048,
        -0.016040293499827385,
        -0.025738777592778206,
        -0.015591882169246674,
        -0.0012203177902847528,
        0.015450953505933285,
        -0.01466943696141243,
        -0.011434474028646946,
        -0.0034047188237309456,
        -0.002813777420669794,
        0.031516868621110916,
        0.004307946190237999,
        -0.00024062041484285146,
        -0.0018000488635152578,
        0.016399022191762924,
        -0.02008880116045475,
        0.0038211001083254814,
        0.02317642979323864,
        0.027135256677865982,
        -0.020947188138961792,
        0.0015998654998838902,
        -0.00026924663688987494,
        -0.016399022191762924,
        0.007289877627044916,
        -0.009128361940383911,
        0.004077334888279438,
        0.005662787239998579,
        0.00022040188196115196,
        -0.005749266128987074,
        0.006681320257484913,
        -0.004484107252210379,
        -0.005912615917623043,
        0.0028281905688345432,
        0.010076429694890976,
        -0.024662591516971588,
        -0.006681320257484913,
        0.030235696583986282,
        -0.011050121858716011,
        0.008827285841107368,
        -0.0037025916390120983,
        -0.0015502199530601501,
        -0.012965476140379906,
        0.00152139354031533,
        -0.026955891400575638,
        0.00638024415820837,
        -0.027980830520391464,
        0.007610171101987362,
        0.008692762814462185,
        0.014541319571435452,
        0.009685671888291836,
        -0.036077845841646194,
        -0.0355910025537014,
        0.01060811709612608,
        0.005262420512735844,
        0.01213911920785904,
        0.017705818638205528,
        0.001870513428002596,
        -0.002735305577516556,
        0.02274082973599434,
        0.007981711067259312,
        0.0035200242418795824,
        0.01664244383573532,
        0.0029915403574705124,
        0.011223080568015575,
        -0.009743324480950832,
        -0.026084693148732185,
        -0.01694992557168007,
        0.007552518043667078,
        0.03935765102505684,
        -0.00034671759931370616,
        -0.0142594613134861,
        -0.006598043721169233,
        0.010793887078762054,
        -0.014400390908122063,
        0.005746063310652971,
        -0.014041662216186523,
        0.014169779606163502,
        0.007213007193058729,
        0.009147578850388527,
        -0.002192408312112093,
        0.02549535408616066,
        0.015463764779269695,
        0.007597359362989664,
        -0.004432860296219587,
        -0.01140885055065155,
        0.005822933744639158,
        0.012324889190495014,
        0.0270327627658844,
        -0.0009344559512101114,
        -0.03131188079714775,
        0.0033694864250719547,
        -0.0012227200204506516,
        0.011114181019365788,
        -0.005787701345980167,
        0.004224669653922319,
        0.009890659712255001,
        -0.01046078186482191,
        -0.0008039363892748952,
        -0.011447285301983356,
        0.001793642994016409,
        0.01627090387046337,
        -0.0035200242418795824,
        -0.005569902248680592,
        0.010166112333536148,
        0.009704889729619026,
        -0.0030636063311249018,
        0.016514327377080917,
        -0.003913985099643469,
        0.021741515025496483,
        -0.02705838531255722,
        0.004576992709189653,
        -0.014797554351389408,
        -0.0013019925681874156,
        0.004862053785473108,
        -0.026046257466077805,
        0.00040076710865832865,
        -0.006790219806134701,
        0.0010249388869851828,
        0.031875599175691605,
        -0.007347530219703913,
        -0.0044648898765444756,
        0.01933290809392929,
        -0.016770562157034874,
        -0.008186698891222477,
        0.0021379583049565554,
        -0.005749266128987074,
        -0.021036868914961815,
        0.0017135696252807975,
        -0.007847188040614128,
        -0.01194694358855486,
        -0.02903139218688011,
        0.0028169802390038967,
        -0.024137310683727264,
        -0.012818141840398312,
        -0.02092156372964382,
        0.01975569687783718,
        0.026161564514040947,
        0.0018448899500072002,
        -0.0010946026304736733,
        -0.023727335035800934,
        0.016104351729154587,
        0.013452322222292423,
        -0.019960684701800346,
        0.007398777175694704,
        -0.02933887392282486,
        -0.016001857817173004,
        0.013388263992965221,
        -0.01579686999320984,
        -0.007635794579982758,
        0.00889775063842535,
        0.008731197565793991,
        0.017590513452887535,
        0.038076478987932205,
        0.22610150277614594,
        -0.024124497547745705,
        0.0023061123210936785,
        0.022279608994722366,
        0.013465134426951408,
        0.02438073232769966,
        -0.009045084938406944,
        -0.0006485941121354699,
        0.0080906106159091,
        0.008039363659918308,
        -0.025790024548768997,
        0.01277970615774393,
        -0.001265959581360221,
        -0.000538092921487987,
        0.002045073313638568,
        -0.017859559506177902,
        -0.04202249273657799,
        -0.01431070826947689,
        -0.02090875245630741,
        -0.026571540161967278,
        0.0007498869090341032,
        -0.014861613512039185,
        -0.011652273125946522,
        -0.006566014606505632,
        0.02472664974629879,
        -0.007501271087676287,
        -0.008103422820568085,
        0.00027305010007694364,
        0.021024057641625404,
        0.007373153697699308,
        0.0004956540069542825,
        -0.019140733405947685,
        0.0003104844072367996,
        0.013093593530356884,
        0.004218264017254114,
        -0.020255353301763535,
        0.014631002210080624,
        -0.004054914228618145,
        0.019384155049920082,
        0.02545691840350628,
        -0.01894855685532093,
        -0.0017231784295290709,
        -0.006325794383883476,
        -0.014733496122062206,
        0.01698836125433445,
        0.010345476679503918,
        -0.0015934596303850412,
        -0.01577124558389187,
        -0.006975989788770676,
        0.02739149145781994,
        -0.01701398566365242,
        0.00041117664659395814,
        0.017193349078297615,
        0.04294493794441223,
        0.0035264301113784313,
        0.013132029213011265,
        -0.010095647536218166,
        0.002938691759482026,
        -0.014682249166071415,
        -0.0053713200613856316,
        -0.014605378732085228,
        0.02775021828711033,
        0.0006537988665513694,
        0.010492810979485512,
        -0.022535841912031174,
        0.023368606343865395,
        -0.033361759036779404,
        -0.005384131800383329,
        0.03331051394343376,
        0.0019073471194133162,
        -0.004026087932288647,
        -0.004006870090961456,
        -0.012312077917158604,
        0.018320782110095024,
        -0.034566063433885574,
        -0.005374522879719734,
        -0.00016395017155446112,
        0.00687349634245038,
        0.03292616084218025,
        0.012862982228398323,
        -0.013721369206905365,
        0.010415940545499325,
        0.014195403084158897,
        -0.011953349225223064,
        -0.0247394610196352,
        -0.03605222329497337,
        0.022177115082740784,
        -0.005835745483636856,
        -0.007040048483759165,
        -0.006034327670931816,
        0.005159926600754261,
        -0.031183764338493347,
        -0.017090855166316032,
        0.006377041339874268,
        0.020345035940408707,
        0.040305718779563904,
        0.004743545316159725,
        0.008993837982416153,
        -0.014541319571435452,
        -0.005217579193413258,
        0.005903006996959448,
        -0.03448919206857681,
        0.0002622402098495513,
        -0.0002678453456610441,
        -0.007219412829726934,
        -0.0060503422282636166,
        -0.00964083056896925,
        0.01178679708391428,
        -0.005573105067014694,
        -0.031158139929175377,
        -0.010781075805425644,
        -0.014067285694181919,
        -0.003763447282835841,
        -0.007456430234014988,
        -0.002764131873846054,
        0.011882884427905083,
        -0.0002940693811979145,
        0.008705574087798595,
        0.005621149204671383,
        -0.016770562157034874,
        0.020947188138961792,
        -0.013401075266301632,
        -0.0018721148371696472,
        -0.0043463814072310925,
        -0.011223080568015575,
        -0.011107774451375008,
        -0.007943276315927505,
        0.004211857914924622,
        0.01619403436779976,
        -0.032285574823617935,
        0.001259553711861372,
        -0.008237945847213268,
        0.027980830520391464,
        0.000949669920373708,
        -0.016706503927707672,
        0.002653630683198571,
        0.012638777494430542,
        -0.01657838560640812,
        -0.007257848046720028,
        0.018730757758021355,
        -0.002205220051109791,
        -0.0010481601348146796,
        0.009314131923019886,
        -0.0022596698254346848,
        -0.015989046543836594,
        0.004990171175450087,
        0.023048311471939087,
        -0.010294229723513126,
        -0.01657838560640812,
        -0.012081466615200043,
        -0.03520664945244789,
        -0.022663960233330727,
        -0.03374611213803291,
        0.0006313783233053982,
        0.04133065789937973,
        -0.014566943049430847,
        -0.017577700316905975,
        -0.029569486156105995,
        -0.011242298409342766,
        0.016527138650417328,
        -0.031106894835829735,
        0.00647953525185585,
        0.019486648961901665,
        -0.015040977858006954,
        -0.03899892419576645,
        -0.011107774451375008,
        -0.16378523409366608,
        0.02549535408616066,
        0.00891056191176176,
        -0.005118288565427065,
        0.03548850864171982,
        0.013798239640891552,
        0.012440195307135582,
        0.0008255562279373407,
        -0.010121271014213562,
        0.001718374085612595,
        0.031414374709129333,
        0.0060055009089410305,
        -0.008609486743807793,
        -0.0006441900623030961,
        0.017116479575634003,
        -0.018064547330141068,
        -0.009019461460411549,
        0.010281417518854141,
        0.02862141653895378,
        0.023778581991791725,
        0.0017423960380256176,
        -0.002471063518896699,
        0.006655696779489517,
        -0.002250061137601733,
        0.01470787264406681,
        -0.006040733307600021,
        0.006373838521540165,
        0.041894376277923584,
        -0.016437456011772156,
        0.007635794579982758,
        -0.03400234505534172,
        -0.03679530322551727,
        0.03172185644507408,
        0.017680194228887558,
        0.019973495975136757,
        0.005326479207724333,
        -0.010531246662139893,
        -0.02708400972187519,
        -0.014323520474135876,
        0.002940293401479721,
        0.028390806168317795,
        0.03287491202354431,
        0.0009328544838353992,
        0.02003755420446396,
        -0.015348459593951702,
        -0.0016350977821275592,
        0.003641735762357712,
        0.012440195307135582,
        0.005115085281431675,
        -0.007366748061031103,
        0.016283715143799782,
        -0.004560977686196566,
        -0.015912175178527832,
        -0.0129590705037117,
        0.002512701554223895,
        0.020614081993699074,
        0.01374699268490076,
        0.016821809113025665,
        -0.005512249190360308,
        0.003782664891332388,
        -0.02204899676144123,
        -0.015732811763882637,
        -0.0023989975452423096,
        0.00907070841640234,
        0.009391002357006073,
        -0.0027048776391893625,
        0.007347530219703913,
        0.017257407307624817,
        -0.0012227200204506516,
        0.013087187893688679,
        0.009211638011038303,
        -0.019922249019145966,
        0.02316361851990223,
        -0.0023861858062446117,
        0.00347518315538764,
        0.010121271014213562,
        -0.018692322075366974,
        0.01848733425140381,
        0.00313407089561224,
        -0.023676088079810143,
        -0.020998435094952583,
        0.026776527985930443,
        -0.01964038982987404,
        0.005364914424717426,
        -0.00030127595528028905,
        0.003420733381062746,
        -0.003888361854478717,
        0.006751784589141607,
        -0.030773788690567017,
        -0.007936869747936726,
        0.02197212725877762,
        -0.030671294778585434,
        -0.011671490967273712,
        -0.008609486743807793,
        0.003657750552520156,
        0.02320205233991146,
        0.006905525457113981,
        0.005444987677037716,
        -0.017552077770233154,
        -0.013862297870218754,
        -0.0018016502726823092,
        -0.0028025670908391476,
        -0.009865036234259605,
        -0.01310640573501587,
        0.04432860389351845,
        -0.004295134451240301,
        0.02782708965241909,
        0.001064174808561802,
        0.017231784760951996,
        -0.01896136812865734,
        -0.020280977711081505,
        -0.00022720811830367893,
        0.024483226239681244,
        0.017500830814242363,
        0.013951979577541351,
        0.005438581574708223,
        0.0038339118473231792,
        -0.014105720445513725,
        0.00026384167722426355,
        0.0073411245830357075,
        0.04986327514052391,
        -0.011306356638669968,
        -0.028390806168317795,
        -0.002086711348965764,
        -0.005976674612611532,
        -0.01780831255018711,
        -0.08840097486972809,
        -0.026904644444584846,
        0.007655011955648661,
        0.01738552562892437,
        -0.007001613266766071,
        0.009352566674351692,
        0.0024342297110706568,
        0.0030043520964682102,
        -0.00830841064453125,
        0.012741271406412125,
        0.011844449676573277,
        -0.02623843401670456,
        -0.024572908878326416,
        -0.0005709229735657573,
        0.034232959151268005,
        -0.027929583564400673,
        0.01891012117266655,
        -0.020204106345772743,
        -0.012734864838421345,
        0.026776527985930443,
        -0.00251430319622159,
        0.001099407090805471,
        -0.008276381529867649,
        0.014131343923509121,
        -0.018231099471449852,
        -0.005486625712364912,
        -0.01698836125433445,
        0.03215745463967323,
        0.002780146664008498,
        -0.002128349617123604,
        -0.01506660133600235,
        -0.019063862040638924,
        0.0063546206802129745,
        0.0014044864801689982,
        0.01137682143598795,
        0.008571051061153412,
        -0.026776527985930443,
        0.013951979577541351,
        0.021498091518878937,
        -0.038563322275877,
        0.007808752823621035,
        0.004125379025936127,
        -0.02276645414531231,
        -0.05173378810286522,
        -0.008744009770452976,
        -0.0009520720923319459,
        -0.0035232272930443287,
        0.014771930873394012,
        0.0011250305688008666,
        -0.013990415260195732,
        -0.028698287904262543,
        -0.04373926669359207,
        -0.007853593677282333,
        0.007879217155277729,
        0.024457603693008423,
        0.013849485665559769,
        0.02507256716489792,
        0.022125868126749992,
        -0.005592322442680597,
        0.02816019393503666,
        0.012254425324499607,
        -0.00028085726080462337,
        -0.032669924199581146,
        0.022907383739948273,
        -0.00753970630466938,
        0.0050222002901136875,
        -0.015976233407855034,
        0.013836674392223358,
        0.018372029066085815,
        0.004708312917500734,
        -0.021088115870952606,
        0.03295178338885307,
        0.005685207433998585,
        0.015117848291993141,
        -0.03436107560992241,
        -0.009262884967029095,
        -0.023266112431883812,
        -0.0015718397917225957,
        0.008391686715185642,
        0.0009889057837426662,
        -0.011460097506642342,
        -0.0154381413012743,
        0.008583863265812397,
        0.013849485665559769,
        0.0004984565894119442,
        0.006226503290235996,
        0.005361711140722036,
        -0.0038851588033139706,
        -0.010512028820812702,
        -0.014746307395398617,
        -0.006809437647461891,
        0.011882884427905083,
        0.0064314911141991615,
        -0.02550816535949707,
        0.003427139250561595,
        0.02284332364797592,
        -0.014797554351389408,
        0.0044969189912080765,
        0.023663274943828583,
        0.0019041441846638918,
        -0.026020634919404984,
        0.005438581574708223,
        -0.06108635663986206,
        0.028723910450935364,
        -0.011876478791236877,
        -0.022689582780003548,
        0.010025182738900185,
        -0.02892889827489853,
        0.0033406601287424564,
        0.0023477505892515182,
        -0.0189741812646389,
        -0.0009905073093250394,
        0.00869916845113039,
        0.00667491415515542,
        0.0037089975085109472,
        0.004727530293166637,
        -0.03874268755316734,
        -0.001560629578307271,
        0.01935853250324726,
        0.0013035940937697887,
        0.01623246818780899,
        0.0004672279756050557,
        -7.51688567106612e-05,
        -0.01702679693698883,
        0.0073603419587016106,
        -0.004833227023482323,
        -0.028108946979045868,
        0.0002832594618666917,
        -0.03487354516983032,
        0.002471063518896699,
        0.007309095002710819,
        -0.031183764338493347,
        0.028108946979045868,
        -0.0479927621781826,
        -0.003910782281309366,
        0.01580968126654625,
        0.0031372737139463425,
        0.012209584005177021,
        -0.03090190701186657,
        0.011690708808600903,
        0.010108459740877151,
        0.013042346574366093,
        -0.0432780422270298,
        -0.031004400923848152,
        0.03333613649010658,
        -0.01580968126654625,
        0.01142166182398796,
        0.007001613266766071,
        -0.021164987236261368,
        0.004189437720924616,
        0.015963422134518623,
        0.020755011588335037,
        -0.0024470414500683546,
        0.03866581618785858,
        -0.022228362038731575,
        -0.001466943765990436,
        0.00762298284098506,
        -0.0011986979516223073,
        0.01666806824505329,
        -0.0010177321964874864,
        -0.008250758051872253,
        -0.023752957582473755,
        0.01616840995848179,
        0.004394425079226494,
        0.014105720445513725,
        0.008763226680457592,
        -0.0016431050607934594,
        0.008295598439872265,
        -0.03169623389840126,
        -0.017244596034288406,
        -0.0317474789917469,
        -0.025982199236750603,
        -0.005864571779966354,
        0.011581809259951115,
        0.02741711400449276,
        0.016155598685145378,
        0.02505975402891636,
        0.017897995188832283,
        -0.01272845920175314,
        0.024906013160943985,
        -0.05765280872583389,
        0.02357359416782856,
        0.014156967401504517,
        0.004826821386814117,
        -0.043047431856393814,
        0.008404498919844627,
        0.021498091518878937,
        0.001606271369382739,
        -0.0024998898152261972,
        -0.009192420169711113,
        -0.01060811709612608,
        0.004897285718470812,
        -0.04061320051550865,
        -0.002450244501233101,
        0.025264741852879524,
        0.013567628338932991,
        -0.022612713277339935,
        0.012241613119840622,
        0.014374767430126667,
        0.0047723716124892235,
        0.004509730730205774,
        0.010729828849434853,
        0.0045193396508693695,
        0.017231784760951996,
        -0.02195931412279606,
        -0.0007931265281513333,
        -0.008417310193181038,
        0.03487354516983032,
        -0.017218973487615585,
        -0.026904644444584846,
        -0.008737603202462196,
        0.01970444992184639,
        0.013990415260195732,
        0.03928077965974808,
        0.012446600943803787,
        0.010960440151393414,
        -0.016501516103744507,
        0.013426698744297028,
        0.0066621024161577225,
        0.0011090158950537443,
        -0.02980009652674198,
        0.057345326989889145,
        -0.0031372737139463425,
        0.01810298301279545,
        0.023355793207883835,
        0.007763911969959736,
        0.018743569031357765,
        -0.00771907065063715,
        -0.02933887392282486,
        -0.010422347113490105,
        0.02276645414531231,
        -0.014528508298099041,
        0.002581564709544182,
        -0.0050446209497749805,
        -0.02236928977072239,
        -0.007206601090729237,
        0.00023361398780252784,
        0.0018256723415106535,
        0.0009008251363411546,
        0.039178285747766495,
        -0.010781075805425644,
        0.045789141207933426,
        0.013260146602988243,
        -0.015271589159965515,
        0.01817985251545906,
        0.01529721263796091,
        0.01333701703697443,
        0.00445528095588088,
        0.02203618548810482,
        -0.017859559506177902,
        -0.009685671888291836,
        0.0341048389673233,
        -0.008628703653812408,
        0.010146894492208958,
        -0.028032077476382256,
        -0.006540391128510237,
        0.006248923949897289,
        -0.024790707975625992,
        0.005390537902712822,
        -0.010652958415448666,
        0.029876967892050743,
        0.03018444962799549,
        0.0038307090289890766,
        0.032669924199581146,
        0.017193349078297615,
        -0.028903275728225708,
        -0.006047139409929514,
        -0.0038243031594902277,
        0.0026344130747020245,
        -0.006245721131563187,
        -0.0285189226269722,
        0.015335647389292717,
        0.009608801454305649,
        -0.0038723470643162727,
        -0.005493031814694405,
        0.026033446192741394,
        -0.007661418057978153,
        -0.00520476745441556,
        -0.01388792134821415,
        -0.02043471857905388,
        0.002728899708017707,
        0.01976850815117359,
        -0.0064314911141991615,
        -0.025392860174179077,
        -0.02584127150475979,
        0.00866713933646679,
        0.0018817236414179206,
        -0.028493300080299377,
        0.001047359430231154,
        -0.020665328949689865
      ]
    }
  ],
  "model": "ada",
  "usage": {
    "prompt_tokens": 7,
    "total_tokens": 7
  }
}


================================================
FILE: libs/kotaemon/tests/resources/fullocr_sample_output.json
================================================
[{"csv_string": ",,,\u5358\u4f4d,\u5b9f\u65bd\u4f8b1,\u5b9f\u65bd\u4f8b2,\u5b9f\u65bd\u4f8b3,\u6bd4\u8f03\u4f8b1,\u6bd4\u8f03\u4f8b2,\u6bd4\u8f03\u4f8b3,\u6bd4\u8f03\u4f8b4\n\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269,\u71b1\u786c\u5316\u6027\u6a39\u8102,\u30a8\u30dd\u30ad\u30b7\u6a39\u81021,\u8cea\u91cf%,10.2,12.4,12.4,10.2,12.4,,10.2\n,,\u30a8\u30dd\u30ad\u30b7\u6a39\u81022,,-,-,-,-,-,14.4,-\n,\u786c\u5316\u5264,\u786c\u5316\u52641,,4.8,6.6,6.6,4.8,6.6,7.6,4.8\n,\u7121\u6a5f\u5145\u586b\u6750,\u7121\u6a5f\u5145\u586b\u52641,,74,70,70,74,70,67,74\n,,\u7121\u6a5f\u5145\u586b\u52642,,10,10,10,10,10,10,10\n,\u786c\u5316\u4fc3\u9032\u5264,\u786c\u5316\u4fc3\u9032\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\n,\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u5264,\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\n,\u96e2\u578b\u5264,\u96e2\u578b\u52641,,0.2,0.2,0.2,0.2,0.2,0.2,0.2\n,\u7740\u8272\u5264,\u7740\u8272\u52641,,0.4,0.4,0.4,0.4,0.4,0.4,0.4\n,\u5408\u8a08,,,100,100,100,100,100,100,100\n,,,\u5358\u4f4d,\u5b9f\u65bd\u4f8b1,\u5b9f\u65bd\u4f8b2,\u5b9f\u65bd\u4f8b3,\u6bd4\u8f03\u4f8b1,\u6bd4\u8f03\u4f8b2,\u6bd4\u8f03\u4f8b3,\u6bd4\u8f03\u4f8b4\n\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269,\u71b1\u786c\u5316\u6027\u6a39\u8102,\u30a8\u30dd\u30ad\u30b7\u6a39\u81021,%,,10,10,(\u30bb\u30e9\u30df\u30c3\u30af),(\u30bb\u30e9\u30df\u30c3\u30af),,\n,,\u30a8\u30dd\u30ad\u30b7\u6a39\u81022,,5,-,-,,,5,5\n,,\u30a8\u30dd\u30ad\u30b7\u6a39\u81023,,5,-,-,,,5,5\n,,\u30b7\u30a2\u30cd\u30fc\u30c8\u6a39\u81021,,10,10,10,,,10,10\n,,\u30d5\u30a7\u30ce\u30fc\u30eb\u7cfb\u786c\u5316\u52641,,5,5,5,,,5,5\n,\u786c\u5316\u89e6\u5a92,\u786c\u5316\u89e6\u5a921,,0.2,0.2,0.2,,,0.2,0.2\n,\u7121\u6a5f\u5145\u586b\u6750,\u7121\u6a5f\u5145\u586b\u67501,,74.8,74.8,74.8,,,74.8,74.8\n,\u5408\u8a08,,,100,100,100,,,100,100\nIL1TD-L1TU,,,ppm,510,470,470,,,510,510\nL125D-L125U,,,ppm,390,330,330,,,390,390\n\u30ac\u30e9\u30b9\u8ee2\u79fb\u6e29\u5ea6 (Tg),,,\u00b0C,,224,224,,,235,235\n", "image": "7810d908b0ff4ce381dcab873196d133.jpg", "image_shape": [1653, 2339], "json": {"ocr": [{"location": [[237, 290], [352, 290], [352, 309], [237, 309]], "type": "textline", "text": "\u71b1\u786c\u5316\u6027\u6a39\u8102", "confidence_by_character": [0.9686674475669861, 0.9664099812507629, 0.9206223487854004, 0.9281898140907288, 0.962303102016449, 0.9605254530906677], "confidence_by_field": 0.9206223487854004, "original_text": "\u71b1\u786c\u5316\u6027\u6a39\u8102"}, {"location": [[694, 272], [739, 272], [739, 297], [694, 297]], "type": "textline", "text": "10.2", "confidence_by_character": [0.9169260263442993, 0.9255782961845398, 0.924151599407196, 0.9199540019035339], "confidence_by_field": 0.9169260263442993, "original_text": "10.2"}, {"location": [[767, 242], [843, 242], [843, 264], [767, 264]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b2", "confidence_by_character": [0.9473932385444641, 0.9163241982460022, 0.7033942937850952, 0.8979774117469788], "confidence_by_field": 0.7033942937850952, "original_text": "\u5b9f\u65bd\u4f8b2"}, {"location": [[783, 272], [830, 272], [830, 297], [783, 297]], "type": "textline", "text": "12.4", "confidence_by_character": [0.9123356342315674, 0.9198176264762878, 0.9250789880752563, 0.92127925157547], "confidence_by_field": 0.9123356342315674, "original_text": "12.4"}, {"location": [[961, 268], [1011, 268], [1011, 298], [961, 298]], "type": "textline", "text": "10.2", "confidence_by_character": [0.917376697063446, 0.9228717684745789, 0.9257153868675232, 0.9199367761611938], "confidence_by_field": 0.917376697063446, "original_text": "10.2"}, {"location": [[1050, 268], [1100, 268], [1100, 298], [1050, 298]], "type": "textline", "text": "12.4", "confidence_by_character": [0.9123420119285583, 0.9206566214561462, 0.9267528057098389, 0.9224498867988586], "confidence_by_field": 0.9123420119285583, "original_text": "12.4"}, {"location": [[1130, 242], [1203, 242], [1203, 264], [1130, 264]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b3", "confidence_by_character": [0.9575085639953613, 0.9481230974197388, 0.756417453289032, 0.9103184342384338], "confidence_by_field": 0.756417453289032, "original_text": "\u6bd4\u8f03\u4f8b3"}, {"location": [[236, 335], [297, 335], [297, 358], [236, 358]], "type": "textline", "text": "\u786c\u5316\u5264", "confidence_by_character": [0.9772048592567444, 0.9326395988464355, 0.9597039222717285], "confidence_by_field": 0.9326395988464355, "original_text": "\u786c\u5316\u5264"}, {"location": [[383, 335], [454, 335], [454, 358], [383, 358]], "type": "textline", "text": "\u786c\u5316\u52641", "confidence_by_character": [0.9785639047622681, 0.9359536170959473, 0.9465298056602478, 0.911139965057373], "confidence_by_field": 0.911139965057373, "original_text": "\u786c\u5316\u52641"}, {"location": [[385, 305], [510, 305], [510, 325], [385, 325]], "type": "textline", "text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81022", "confidence_by_character": [0.9240000247955322, 0.9118557572364807, 0.8709841966629028, 0.898737370967865, 0.9585683345794678, 0.9542934894561768, 0.9125517010688782], "confidence_by_field": 0.8709841966629028, "original_text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81022"}, {"location": [[385, 274], [509, 274], [509, 294], [385, 294]], "type": "textline", "text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81021", "confidence_by_character": [0.9230291247367859, 0.913360595703125, 0.8715535402297974, 0.9017773270606995, 0.9544979333877563, 0.9605312943458557, 0.9045484662055969], "confidence_by_field": 0.8715535402297974, "original_text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81021"}, {"location": [[601, 240], [644, 240], [644, 265], [601, 265]], "type": "textline", "text": "\u5358\u4f4d", "confidence_by_character": [0.9253715872764587, 0.9197276830673218], "confidence_by_field": 0.9197276830673218, "original_text": "\u5358\u4f4d"}, {"location": [[675, 242], [752, 242], [752, 264], [675, 264]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b1", "confidence_by_character": [0.9467881917953491, 0.9383226037025452, 0.7930335402488708, 0.7423544526100159], "confidence_by_field": 0.7423544526100159, "original_text": "\u5b9f\u65bd\u4f8b1"}, {"location": [[856, 242], [933, 242], [933, 264], [856, 264]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b3", "confidence_by_character": [0.9493170976638794, 0.9393699169158936, 0.7815940976142883, 0.8886772990226746], "confidence_by_field": 0.7815940976142883, "original_text": "\u5b9f\u65bd\u4f8b3"}, {"location": [[1232, 272], [1280, 272], [1280, 297], [1232, 297]], "type": "textline", "text": "10.2", "confidence_by_character": [0.9169086217880249, 0.9247543811798096, 0.9242917895317078, 0.9190678596496582], "confidence_by_field": 0.9169086217880249, "original_text": "10.2"}, {"location": [[709, 308], [722, 308], [722, 324], [709, 324]], "type": "textline", "text": "-", "confidence_by_character": [0.8763231635093689], "confidence_by_field": 0.8763231635093689, "original_text": "-"}, {"location": [[788, 334], [825, 334], [825, 359], [788, 359]], "type": "textline", "text": "6.6", "confidence_by_character": [0.9220069050788879, 0.9281726479530334, 0.9160889387130737], "confidence_by_field": 0.9160889387130737, "original_text": "6.6"}, {"location": [[799, 308], [812, 308], [812, 324], [799, 324]], "type": "textline", "text": "-", "confidence_by_character": [0.888385534286499], "confidence_by_field": 0.888385534286499, "original_text": "-"}, {"location": [[873, 273], [919, 273], [919, 297], [873, 297]], "type": "textline", "text": "12.4", "confidence_by_character": [0.912204921245575, 0.9198381900787354, 0.9228084087371826, 0.9204709529876709], "confidence_by_field": 0.912204921245575, "original_text": "12.4"}, {"location": [[949, 242], [1022, 242], [1022, 264], [949, 264]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b1", "confidence_by_character": [0.9624030590057373, 0.9464587569236755, 0.7610214948654175, 0.9083214998245239], "confidence_by_field": 0.7610214948654175, "original_text": "\u6bd4\u8f03\u4f8b1"}, {"location": [[1040, 242], [1114, 242], [1114, 264], [1040, 264]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b2", "confidence_by_character": [0.9574549794197083, 0.95237135887146, 0.7252858281135559, 0.9158275127410889], "confidence_by_field": 0.7252858281135559, "original_text": "\u6bd4\u8f03\u4f8b2"}, {"location": [[1219, 242], [1293, 242], [1293, 264], [1219, 264]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b4", "confidence_by_character": [0.9668106436729431, 0.9518352746963501, 0.792457640171051, 0.91578209400177], "confidence_by_field": 0.792457640171051, "original_text": "\u6bd4\u8f03\u4f8b4"}, {"location": [[886, 307], [904, 307], [904, 324], [886, 324]], "type": "textline", "text": "-", "confidence_by_character": [0.8876243233680725], "confidence_by_field": 0.8876243233680725, "original_text": "-"}, {"location": [[980, 310], [993, 310], [993, 324], [980, 324]], "type": "textline", "text": "-", "confidence_by_character": [0.8970376253128052], "confidence_by_field": 0.8970376253128052, "original_text": "-"}, {"location": [[1065, 306], [1087, 306], [1087, 328], [1065, 328]], "type": "textline", "text": "-", "confidence_by_character": [0.8992383480072021], "confidence_by_field": 0.8992383480072021, "original_text": "-"}, {"location": [[1249, 310], [1262, 310], [1262, 324], [1249, 324]], "type": "textline", "text": "-", "confidence_by_character": [0.8871487379074097], "confidence_by_field": 0.8871487379074097, "original_text": "-"}, {"location": [[237, 382], [333, 382], [333, 401], [237, 401]], "type": "textline", "text": "\u7121\u6a5f\u5145\u586b\u6750", "confidence_by_character": [0.952167809009552, 0.9192871451377869, 0.9678506255149841, 0.9592251181602478, 0.9547764658927917], "confidence_by_field": 0.9192871451377869, "original_text": "\u7121\u6a5f\u5145\u586b\u6750"}, {"location": [[384, 367], [490, 367], [490, 386], [384, 386]], "type": "textline", "text": "\u7121\u6a5f\u5145\u586b\u52641", "confidence_by_character": [0.9493359923362732, 0.9333418011665344, 0.9685581922531128, 0.9741933941841125, 0.9563856720924377, 0.9097828269004822], "confidence_by_field": 0.9097828269004822, "original_text": "\u7121\u6a5f\u5145\u586b\u52641"}, {"location": [[697, 334], [734, 334], [734, 358], [697, 358]], "type": "textline", "text": "4.8", "confidence_by_character": [0.9166890978813171, 0.9267215132713318, 0.9219436645507812], "confidence_by_field": 0.9166890978813171, "original_text": "4.8"}, {"location": [[880, 337], [914, 337], [914, 359], [880, 359]], "type": "textline", "text": "6.6", "confidence_by_character": [0.9212806820869446, 0.9280492067337036, 0.917919933795929], "confidence_by_field": 0.917919933795929, "original_text": "6.6"}, {"location": [[701, 396], [733, 396], [733, 421], [701, 421]], "type": "textline", "text": "10", "confidence_by_character": [0.9160857200622559, 0.9220792651176453], "confidence_by_field": 0.9160857200622559, "original_text": "10"}, {"location": [[701, 364], [733, 364], [733, 389], [701, 389]], "type": "textline", "text": "74", "confidence_by_character": [0.9179115295410156, 0.9187763333320618], "confidence_by_field": 0.9179115295410156, "original_text": "74"}, {"location": [[793, 365], [820, 365], [820, 388], [793, 388]], "type": "textline", "text": "70", "confidence_by_character": [0.9186102151870728, 0.9202508330345154], "confidence_by_field": 0.9186102151870728, "original_text": "70"}, {"location": [[883, 367], [910, 367], [910, 388], [883, 388]], "type": "textline", "text": "70", "confidence_by_character": [0.9183272123336792, 0.920913577079773], "confidence_by_field": 0.9183272123336792, "original_text": "70"}, {"location": [[969, 334], [1006, 334], [1006, 359], [969, 359]], "type": "textline", "text": "4.8", "confidence_by_character": [0.9184820652008057, 0.9289669990539551, 0.9193996787071228], "confidence_by_field": 0.9184820652008057, "original_text": "4.8"}, {"location": [[970, 364], [1002, 364], [1002, 389], [970, 389]], "type": "textline", "text": "74", "confidence_by_character": [0.9170815348625183, 0.9173583984375], "confidence_by_field": 0.9170815348625183, "original_text": "74"}, {"location": [[1059, 334], [1094, 334], [1094, 359], [1059, 359]], "type": "textline", "text": "6.6", "confidence_by_character": [0.9224190711975098, 0.9298495650291443, 0.9167705178260803], "confidence_by_field": 0.9167705178260803, "original_text": "6.6"}, {"location": [[1143, 303], [1190, 303], [1190, 327], [1143, 327]], "type": "textline", "text": "14.4", "confidence_by_character": [0.9120434522628784, 0.9212027788162231, 0.9268625974655151, 0.9202296137809753], "confidence_by_field": 0.9120434522628784, "original_text": "14.4"}, {"location": [[1061, 364], [1093, 364], [1093, 391], [1061, 391]], "type": "textline", "text": "70", "confidence_by_character": [0.9193744659423828, 0.9198716878890991], "confidence_by_field": 0.9193744659423828, "original_text": "70"}, {"location": [[1148, 334], [1185, 334], [1185, 359], [1148, 359]], "type": "textline", "text": "7.6", "confidence_by_character": [0.9237031936645508, 0.922902524471283, 0.9160147905349731], "confidence_by_field": 0.9160147905349731, "original_text": "7.6"}, {"location": [[1149, 364], [1182, 364], [1182, 389], [1149, 389]], "type": "textline", "text": "67", "confidence_by_character": [0.9192152619361877, 0.9226703643798828], "confidence_by_field": 0.9192152619361877, "original_text": "67"}, {"location": [[1238, 334], [1275, 334], [1275, 358], [1238, 358]], "type": "textline", "text": "4.8", "confidence_by_character": [0.9171125292778015, 0.9256289005279541, 0.9216449856758118], "confidence_by_field": 0.9171125292778015, "original_text": "4.8"}, {"location": [[1243, 365], [1270, 365], [1270, 388], [1243, 388]], "type": "textline", "text": "74", "confidence_by_character": [0.9173100590705872, 0.9199501872062683], "confidence_by_field": 0.9173100590705872, "original_text": "74"}, {"location": [[384, 429], [495, 429], [495, 448], [384, 448]], "type": "textline", "text": "\u786c\u5316\u4fc3\u9032\u52641", "confidence_by_character": [0.9833092093467712, 0.9183294773101807, 0.8089151382446289, 0.9589380621910095, 0.9683259725570679, 0.9022980332374573], "confidence_by_field": 0.8089151382446289, "original_text": "\u786c\u5316\u4fc3\u9032\u52641"}, {"location": [[236, 520], [295, 520], [295, 542], [236, 542]], "type": "textline", "text": "\u7740\u8272\u5264", "confidence_by_character": [0.9404123425483704, 0.9459457397460938, 0.9636843204498291], "confidence_by_field": 0.9404123425483704, "original_text": "\u7740\u8272\u5264"}, {"location": [[236, 489], [295, 489], [295, 512], [236, 512]], "type": "textline", "text": "\u96e2\u578b\u5264", "confidence_by_character": [0.9581624865531921, 0.945020318031311, 0.9589981436729431], "confidence_by_field": 0.945020318031311, "original_text": "\u96e2\u578b\u5264"}, {"location": [[237, 459], [371, 459], [371, 480], [237, 480]], "type": "textline", "text": "\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u5264", "confidence_by_character": [0.9071572422981262, 0.9223423600196838, 0.9463334083557129, 0.9168652892112732, 0.921983003616333, 0.9373961091041565, 0.9660329818725586], "confidence_by_field": 0.9071572422981262, "original_text": "\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u5264"}, {"location": [[237, 429], [333, 429], [333, 448], [237, 448]], "type": "textline", "text": "\u786c\u5316\u4fc3\u9032\u5264", "confidence_by_character": [0.9770804643630981, 0.9250267744064331, 0.831341564655304, 0.9542734026908875, 0.9714828729629517], "confidence_by_field": 0.831341564655304, "original_text": "\u786c\u5316\u4fc3\u9032\u5264"}, {"location": [[384, 398], [491, 398], [491, 417], [384, 417]], "type": "textline", "text": "\u7121\u6a5f\u5145\u586b\u52642", "confidence_by_character": [0.9506935477256775, 0.931281566619873, 0.9721326231956482, 0.9734131693840027, 0.9598016738891602, 0.915614664554596], "confidence_by_field": 0.915614664554596, "original_text": "\u7121\u6a5f\u5145\u586b\u52642"}, {"location": [[592, 410], [652, 410], [652, 434], [592, 434]], "type": "textline", "text": "\u8cea\u91cf%", "confidence_by_character": [0.9058261513710022, 0.9396486282348633, 0.9175691604614258], "confidence_by_field": 0.9058261513710022, "original_text": "\u8cea\u91cf%"}, {"location": [[883, 397], [910, 397], [910, 419], [883, 419]], "type": "textline", "text": "10", "confidence_by_character": [0.9157432913780212, 0.9219378232955933], "confidence_by_field": 0.9157432913780212, "original_text": "10"}, {"location": [[970, 396], [1002, 396], [1002, 421], [970, 421]], "type": "textline", "text": "10", "confidence_by_character": [0.9164617657661438, 0.9216349720954895], "confidence_by_field": 0.9164617657661438, "original_text": "10"}, {"location": [[1061, 396], [1093, 396], [1093, 421], [1061, 421]], "type": "textline", "text": "10", "confidence_by_character": [0.9159533977508545, 0.9209455251693726], "confidence_by_field": 0.9159533977508545, "original_text": "10"}, {"location": [[1151, 396], [1183, 396], [1183, 421], [1151, 421]], "type": "textline", "text": "10", "confidence_by_character": [0.9155728816986084, 0.9205871224403381], "confidence_by_field": 0.9155728816986084, "original_text": "10"}, {"location": [[1243, 397], [1270, 397], [1270, 419], [1243, 419]], "type": "textline", "text": "10", "confidence_by_character": [0.9161996841430664, 0.9221776127815247], "confidence_by_field": 0.9161996841430664, "original_text": "10"}, {"location": [[385, 459], [532, 459], [532, 479], [385, 479]], "type": "textline", "text": "\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u52641", "confidence_by_character": [0.906684398651123, 0.9242702126502991, 0.9463043808937073, 0.9182814955711365, 0.9192276000976562, 0.9397355914115906, 0.958014190196991, 0.9101974368095398], "confidence_by_field": 0.906684398651123, "original_text": "\u30ab\u30c3\u30d7\u30ea\u30f3\u30b0\u52641"}, {"location": [[384, 520], [457, 520], [457, 542], [384, 542]], "type": "textline", "text": "\u7740\u8272\u52641", "confidence_by_character": [0.9345429539680481, 0.9415675401687622, 0.9616525769233704, 0.9126831889152527], "confidence_by_field": 0.9126831889152527, "original_text": "\u7740\u8272\u52641"}, {"location": [[384, 491], [457, 491], [457, 510], [384, 510]], "type": "textline", "text": "\u96e2\u578b\u52641", "confidence_by_character": [0.938474178314209, 0.9532917737960815, 0.9655312895774841, 0.9134681224822998], "confidence_by_field": 0.9134681224822998, "original_text": "\u96e2\u578b\u52641"}, {"location": [[697, 458], [734, 458], [734, 483], [697, 483]], "type": "textline", "text": "0.2", "confidence_by_character": [0.918220579624176, 0.9233868718147278, 0.9168131947517395], "confidence_by_field": 0.9168131947517395, "original_text": "0.2"}, {"location": [[697, 427], [734, 427], [734, 451], [697, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9187952280044556, 0.9251962304115295, 0.9168661236763], "confidence_by_field": 0.9168661236763, "original_text": "0.2"}, {"location": [[878, 427], [915, 427], [915, 451], [878, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9180716276168823, 0.9220828413963318, 0.9168251156806946], "confidence_by_field": 0.9168251156806946, "original_text": "0.2"}, {"location": [[697, 520], [734, 520], [734, 543], [697, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9189480543136597, 0.9261084794998169, 0.9156019687652588], "confidence_by_field": 0.9156019687652588, "original_text": "0.4"}, {"location": [[699, 488], [734, 488], [734, 513], [699, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9182897210121155, 0.9248169660568237, 0.9167609214782715], "confidence_by_field": 0.9167609214782715, "original_text": "0.2"}, {"location": [[788, 456], [825, 456], [825, 481], [788, 481]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9177680015563965, 0.9243178963661194, 0.9165362119674683], "confidence_by_field": 0.9165362119674683, "original_text": "0.2"}, {"location": [[788, 427], [825, 427], [825, 451], [788, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9185940027236938, 0.922125518321991, 0.9162130951881409], "confidence_by_field": 0.9162130951881409, "original_text": "0.2"}, {"location": [[793, 397], [820, 397], [820, 419], [793, 419]], "type": "textline", "text": "10", "confidence_by_character": [0.9177662134170532, 0.9242537021636963], "confidence_by_field": 0.9177662134170532, "original_text": "10"}, {"location": [[789, 488], [825, 488], [825, 513], [789, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9175012111663818, 0.9248098731040955, 0.9154093265533447], "confidence_by_field": 0.9154093265533447, "original_text": "0.2"}, {"location": [[878, 458], [914, 458], [914, 483], [878, 483]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9184542298316956, 0.9236595034599304, 0.9165345430374146], "confidence_by_field": 0.9165345430374146, "original_text": "0.2"}, {"location": [[969, 456], [1006, 456], [1006, 481], [969, 481]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9164131879806519, 0.9254051446914673, 0.9154728651046753], "confidence_by_field": 0.9154728651046753, "original_text": "0.2"}, {"location": [[969, 426], [1006, 426], [1006, 451], [969, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9165401458740234, 0.9244502782821655, 0.9159473776817322], "confidence_by_field": 0.9159473776817322, "original_text": "0.2"}, {"location": [[1057, 456], [1094, 456], [1094, 481], [1057, 481]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9178974628448486, 0.9235621690750122, 0.9160559177398682], "confidence_by_field": 0.9160559177398682, "original_text": "0.2"}, {"location": [[1057, 426], [1094, 426], [1094, 451], [1057, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9180322885513306, 0.921731173992157, 0.9166355133056641], "confidence_by_field": 0.9166355133056641, "original_text": "0.2"}, {"location": [[1148, 456], [1185, 456], [1185, 483], [1148, 483]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9164595603942871, 0.9242724180221558, 0.9157667756080627], "confidence_by_field": 0.9157667756080627, "original_text": "0.2"}, {"location": [[1148, 426], [1185, 426], [1185, 451], [1148, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9180106520652771, 0.9206836223602295, 0.9163994789123535], "confidence_by_field": 0.9163994789123535, "original_text": "0.2"}, {"location": [[1238, 456], [1274, 456], [1274, 481], [1238, 481]], "type": "textline", "text": "0.2", "confidence_by_character": [0.917959451675415, 0.9236196279525757, 0.916109561920166], "confidence_by_field": 0.916109561920166, "original_text": "0.2"}, {"location": [[1238, 426], [1275, 426], [1275, 451], [1238, 451]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9176772832870483, 0.9209798574447632, 0.9162566065788269], "confidence_by_field": 0.9162566065788269, "original_text": "0.2"}, {"location": [[791, 521], [825, 521], [825, 543], [791, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9188475012779236, 0.9243553280830383, 0.9138782024383545], "confidence_by_field": 0.9138782024383545, "original_text": "0.4"}, {"location": [[878, 518], [914, 518], [914, 543], [878, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9186568856239319, 0.924356997013092, 0.9140645265579224], "confidence_by_field": 0.9140645265579224, "original_text": "0.4"}, {"location": [[878, 488], [914, 488], [914, 513], [878, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9191107749938965, 0.9236589670181274, 0.9163731336593628], "confidence_by_field": 0.9163731336593628, "original_text": "0.2"}, {"location": [[969, 518], [1006, 518], [1006, 543], [969, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9177034497261047, 0.9257544279098511, 0.911573052406311], "confidence_by_field": 0.911573052406311, "original_text": "0.4"}, {"location": [[969, 488], [1006, 488], [1006, 513], [969, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9173431992530823, 0.9257070422172546, 0.9158886075019836], "confidence_by_field": 0.9158886075019836, "original_text": "0.2"}, {"location": [[1057, 488], [1094, 488], [1094, 513], [1057, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9179250001907349, 0.9226782917976379, 0.9163774251937866], "confidence_by_field": 0.9163774251937866, "original_text": "0.2"}, {"location": [[1059, 518], [1094, 518], [1094, 543], [1059, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9183651208877563, 0.9259626865386963, 0.9120912551879883], "confidence_by_field": 0.9120912551879883, "original_text": "0.4"}, {"location": [[1148, 488], [1185, 488], [1185, 513], [1148, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9188930988311768, 0.922532856464386, 0.91632080078125], "confidence_by_field": 0.91632080078125, "original_text": "0.2"}, {"location": [[1149, 518], [1185, 518], [1185, 543], [1149, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9181385040283203, 0.9250556230545044, 0.9124614000320435], "confidence_by_field": 0.9124614000320435, "original_text": "0.4"}, {"location": [[1238, 518], [1274, 518], [1274, 543], [1238, 543]], "type": "textline", "text": "0.4", "confidence_by_character": [0.9189369678497314, 0.924277663230896, 0.9133647680282593], "confidence_by_field": 0.9133647680282593, "original_text": "0.4"}, {"location": [[1238, 488], [1274, 488], [1274, 513], [1238, 513]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9190401434898376, 0.9235551357269287, 0.916479766368866], "confidence_by_field": 0.916479766368866, "original_text": "0.2"}, {"location": [[383, 551], [423, 551], [423, 571], [383, 571]], "type": "textline", "text": "\u5408\u8a08", "confidence_by_character": [0.9172796607017517, 0.9246873259544373], "confidence_by_field": 0.9172796607017517, "original_text": "\u5408\u8a08"}, {"location": [[696, 550], [736, 550], [736, 574], [696, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9186745285987854, 0.9217865467071533, 0.9134313464164734], "confidence_by_field": 0.9134313464164734, "original_text": "100"}, {"location": [[786, 548], [826, 548], [826, 574], [786, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9187353849411011, 0.9226377010345459, 0.9132767915725708], "confidence_by_field": 0.9132767915725708, "original_text": "100"}, {"location": [[877, 548], [917, 548], [917, 574], [877, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.918876051902771, 0.9223041534423828, 0.9158591628074646], "confidence_by_field": 0.9158591628074646, "original_text": "100"}, {"location": [[965, 548], [1007, 548], [1007, 574], [965, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9187213182449341, 0.9187850952148438, 0.9130398035049438], "confidence_by_field": 0.9130398035049438, "original_text": "100"}, {"location": [[1056, 548], [1096, 548], [1096, 574], [1056, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9191818237304688, 0.9226840734481812, 0.9121555089950562], "confidence_by_field": 0.9121555089950562, "original_text": "100"}, {"location": [[1146, 548], [1186, 548], [1186, 574], [1146, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9189720749855042, 0.9227191805839539, 0.9106909036636353], "confidence_by_field": 0.9106909036636353, "original_text": "100"}, {"location": [[1237, 550], [1277, 550], [1277, 574], [1237, 574]], "type": "textline", "text": "100", "confidence_by_character": [0.9188445806503296, 0.9203974008560181, 0.9133830666542053], "confidence_by_field": 0.9133830666542053, "original_text": "100"}, {"location": [[797, 763], [828, 763], [828, 793], [797, 793]], "type": "textline", "text": "10", "confidence_by_character": [0.9181389808654785, 0.9251900911331177], "confidence_by_field": 0.9181389808654785, "original_text": "10"}, {"location": [[862, 736], [938, 736], [938, 758], [862, 758]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b3", "confidence_by_character": [0.940096378326416, 0.9323399066925049, 0.7783450484275818, 0.8894999623298645], "confidence_by_field": 0.7783450484275818, "original_text": "\u5b9f\u65bd\u4f8b3"}, {"location": [[886, 763], [919, 763], [919, 791], [886, 791]], "type": "textline", "text": "10", "confidence_by_character": [0.915480375289917, 0.92236328125], "confidence_by_field": 0.915480375289917, "original_text": "10"}, {"location": [[1225, 736], [1299, 736], [1299, 758], [1225, 758]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b4", "confidence_by_character": [0.9564768671989441, 0.9437181949615479, 0.8032115697860718, 0.9161063432693481], "confidence_by_field": 0.8032115697860718, "original_text": "\u6bd4\u8f03\u4f8b4"}, {"location": [[243, 831], [358, 831], [358, 851], [243, 851]], "type": "textline", "text": "\u71b1\u786c\u5316\u6027\u6a39\u8102", "confidence_by_character": [0.9673244953155518, 0.9746560454368591, 0.9246954917907715, 0.9313607811927795, 0.9588099122047424, 0.9608355164527893], "confidence_by_field": 0.9246954917907715, "original_text": "\u71b1\u786c\u5316\u6027\u6a39\u8102"}, {"location": [[391, 864], [534, 864], [534, 883], [391, 883]], "type": "textline", "text": "\u30b7\u30a2\u30cd\u30fc\u30c8\u6a39\u81021", "confidence_by_character": [0.9139906764030457, 0.9277936220169067, 0.9299982190132141, 0.9196512699127197, 0.9228595495223999, 0.9653444886207581, 0.9586682319641113, 0.9014871120452881], "confidence_by_field": 0.9014871120452881, "original_text": "\u30b7\u30a2\u30cd\u30fc\u30c8\u6a39\u81021"}, {"location": [[391, 832], [515, 832], [515, 852], [391, 852]], "type": "textline", "text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81023", "confidence_by_character": [0.9210723042488098, 0.9120734930038452, 0.8719823360443115, 0.9054407477378845, 0.9484961628913879, 0.955137312412262, 0.9074426889419556], "confidence_by_field": 0.8719823360443115, "original_text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81023"}, {"location": [[391, 801], [515, 801], [515, 821], [391, 821]], "type": "textline", "text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81022", "confidence_by_character": [0.9195655584335327, 0.8951327800750732, 0.8749004602432251, 0.9026333689689636, 0.9528785347938538, 0.9567875266075134, 0.9132635593414307], "confidence_by_field": 0.8749004602432251, "original_text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81022"}, {"location": [[391, 769], [512, 769], [512, 789], [391, 789]], "type": "textline", "text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81021", "confidence_by_character": [0.9198406338691711, 0.9114395380020142, 0.8742417097091675, 0.9063976407051086, 0.946216344833374, 0.9661325216293335, 0.8822324275970459], "confidence_by_field": 0.8742417097091675, "original_text": "\u30a8\u30dd\u30ad\u30b7\u6a39\u81021"}, {"location": [[605, 734], [649, 734], [649, 760], [605, 760]], "type": "textline", "text": "\u5358\u4f4d", "confidence_by_character": [0.9275341629981995, 0.9182473421096802], "confidence_by_field": 0.9182473421096802, "original_text": "\u5358\u4f4d"}, {"location": [[681, 736], [759, 736], [759, 760], [681, 760]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b1", "confidence_by_character": [0.9462378621101379, 0.9295192360877991, 0.7775577306747437, 0.7519926428794861], "confidence_by_field": 0.7519926428794861, "original_text": "\u5b9f\u65bd\u4f8b1"}, {"location": [[772, 736], [847, 736], [847, 760], [772, 760]], "type": "textline", "text": "\u5b9f\u65bd\u4f8b2", "confidence_by_character": [0.945273756980896, 0.9234545826911926, 0.7456976771354675, 0.8920645117759705], "confidence_by_field": 0.7456976771354675, "original_text": "\u5b9f\u65bd\u4f8b2"}, {"location": [[956, 736], [1027, 736], [1027, 758], [956, 758]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b1", "confidence_by_character": [0.9605406522750854, 0.9484114050865173, 0.8022181391716003, 0.9111186861991882], "confidence_by_field": 0.8022181391716003, "original_text": "\u6bd4\u8f03\u4f8b1"}, {"location": [[1046, 736], [1119, 736], [1119, 758], [1046, 758]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b2", "confidence_by_character": [0.939124345779419, 0.9446831941604614, 0.7813613414764404, 0.9183083176612854], "confidence_by_field": 0.7813613414764404, "original_text": "\u6bd4\u8f03\u4f8b2"}, {"location": [[1136, 736], [1209, 736], [1209, 758], [1136, 758]], "type": "textline", "text": "\u6bd4\u8f03\u4f8b3", "confidence_by_character": [0.9410742521286011, 0.9363664984703064, 0.8050448894500732, 0.912356972694397], "confidence_by_field": 0.8050448894500732, "original_text": "\u6bd4\u8f03\u4f8b3"}, {"location": [[710, 799], [731, 799], [731, 823], [710, 823]], "type": "textline", "text": "5", "confidence_by_character": [0.9124702215194702], "confidence_by_field": 0.9124702215194702, "original_text": "5"}, {"location": [[1161, 798], [1183, 798], [1183, 823], [1161, 823]], "type": "textline", "text": "5", "confidence_by_character": [0.9127680659294128], "confidence_by_field": 0.9127680659294128, "original_text": "5"}, {"location": [[612, 874], [641, 874], [641, 901], [612, 901]], "type": "textline", "text": "%", "confidence_by_character": [0.9244439005851746], "confidence_by_field": 0.9244439005851746, "original_text": "%"}, {"location": [[705, 861], [738, 861], [738, 887], [705, 887]], "type": "textline", "text": "10", "confidence_by_character": [0.9159777164459229, 0.9210568070411682], "confidence_by_field": 0.9159777164459229, "original_text": "10"}, {"location": [[710, 829], [731, 829], [731, 853], [710, 853]], "type": "textline", "text": "5", "confidence_by_character": [0.9128246903419495], "confidence_by_field": 0.9128246903419495, "original_text": "5"}, {"location": [[710, 895], [731, 895], [731, 917], [710, 917]], "type": "textline", "text": "5", "confidence_by_character": [0.913021445274353], "confidence_by_field": 0.913021445274353, "original_text": "5"}, {"location": [[797, 863], [825, 863], [825, 885], [797, 885]], "type": "textline", "text": "10", "confidence_by_character": [0.9169679284095764, 0.9230868220329285], "confidence_by_field": 0.9169679284095764, "original_text": "10"}, {"location": [[804, 834], [820, 834], [820, 852], [804, 852]], "type": "textline", "text": "-", "confidence_by_character": [0.9069211483001709], "confidence_by_field": 0.9069211483001709, "original_text": "-"}, {"location": [[804, 804], [820, 804], [820, 822], [804, 822]], "type": "textline", "text": "-", "confidence_by_character": [0.9056602120399475], "confidence_by_field": 0.9056602120399475, "original_text": "-"}, {"location": [[894, 834], [909, 834], [909, 850], [894, 850]], "type": "textline", "text": "-", "confidence_by_character": [0.9042128920555115], "confidence_by_field": 0.9042128920555115, "original_text": "-"}, {"location": [[894, 806], [909, 806], [909, 822], [894, 822]], "type": "textline", "text": "-", "confidence_by_character": [0.9011750817298889], "confidence_by_field": 0.9011750817298889, "original_text": "-"}, {"location": [[1251, 798], [1272, 798], [1272, 823], [1251, 823]], "type": "textline", "text": "5", "confidence_by_character": [0.9124239683151245], "confidence_by_field": 0.9124239683151245, "original_text": "5"}, {"location": [[801, 895], [822, 895], [822, 917], [801, 917]], "type": "textline", "text": "5", "confidence_by_character": [0.9138578176498413], "confidence_by_field": 0.9138578176498413, "original_text": "5"}, {"location": [[888, 863], [915, 863], [915, 885], [888, 885]], "type": "textline", "text": "10", "confidence_by_character": [0.916037380695343, 0.9229444265365601], "confidence_by_field": 0.916037380695343, "original_text": "10"}, {"location": [[1161, 829], [1183, 829], [1183, 853], [1161, 853]], "type": "textline", "text": "5", "confidence_by_character": [0.913336455821991], "confidence_by_field": 0.913336455821991, "original_text": "5"}, {"location": [[891, 893], [912, 893], [912, 917], [891, 917]], "type": "textline", "text": "5", "confidence_by_character": [0.9128530025482178], "confidence_by_field": 0.9128530025482178, "original_text": "5"}, {"location": [[1156, 860], [1190, 860], [1190, 887], [1156, 887]], "type": "textline", "text": "10", "confidence_by_character": [0.9161958694458008, 0.922942042350769], "confidence_by_field": 0.9161958694458008, "original_text": "10"}, {"location": [[1157, 889], [1185, 889], [1185, 920], [1157, 920]], "type": "textline", "text": "5", "confidence_by_character": [0.9128023386001587], "confidence_by_field": 0.9128023386001587, "original_text": "5"}, {"location": [[1246, 860], [1278, 860], [1278, 887], [1246, 887]], "type": "textline", "text": "10", "confidence_by_character": [0.9154711961746216, 0.9222509860992432], "confidence_by_field": 0.9154711961746216, "original_text": "10"}, {"location": [[1249, 829], [1272, 829], [1272, 853], [1249, 853]], "type": "textline", "text": "5", "confidence_by_character": [0.9134175777435303], "confidence_by_field": 0.9134175777435303, "original_text": "5"}, {"location": [[1251, 893], [1272, 893], [1272, 918], [1251, 918]], "type": "textline", "text": "5", "confidence_by_character": [0.912886381149292], "confidence_by_field": 0.912886381149292, "original_text": "5"}, {"location": [[242, 958], [338, 958], [338, 978], [242, 978]], "type": "textline", "text": "\u7121\u6a5f\u5145\u586b\u6750", "confidence_by_character": [0.9516746997833252, 0.9333645105361938, 0.9753089547157288, 0.9772308468818665, 0.961030125617981], "confidence_by_field": 0.9333645105361938, "original_text": "\u7121\u6a5f\u5145\u586b\u6750"}, {"location": [[242, 927], [320, 927], [320, 946], [242, 946]], "type": "textline", "text": "\u786c\u5316\u89e6\u5a92", "confidence_by_character": [0.9817054271697998, 0.9334205985069275, 0.9472013115882874, 0.9651457667350769], "confidence_by_field": 0.9334205985069275, "original_text": "\u786c\u5316\u89e6\u5a92"}, {"location": [[392, 895], [571, 895], [571, 915], [392, 915]], "type": "textline", "text": "\u30d5\u30a7\u30ce\u30fc\u30eb\u7cfb\u786c\u5316\u52641", "confidence_by_character": [0.8958667516708374, 0.8963339328765869, 0.918157160282135, 0.9283046126365662, 0.9119880199432373, 0.9494227766990662, 0.9859360456466675, 0.9373766779899597, 0.948577344417572, 0.9160327911376953], "confidence_by_field": 0.8958667516708374, "original_text": "\u30d5\u30a7\u30ce\u30fc\u30eb\u7cfb\u786c\u5316\u52641"}, {"location": [[390, 958], [496, 958], [496, 978], [390, 978]], "type": "textline", "text": "\u7121\u6a5f\u5145\u586b\u67501", "confidence_by_character": [0.955777108669281, 0.9075387120246887, 0.9792174100875854, 0.9780659079551697, 0.9546536803245544, 0.9078008532524109], "confidence_by_field": 0.9075387120246887, "original_text": "\u7121\u6a5f\u5145\u586b\u67501"}, {"location": [[390, 927], [477, 927], [477, 946], [390, 946]], "type": "textline", "text": "\u786c\u5316\u89e6\u5a921", "confidence_by_character": [0.9808465838432312, 0.9380431771278381, 0.9594202041625977, 0.9618983864784241, 0.9127021431922913], "confidence_by_field": 0.9127021431922913, "original_text": "\u786c\u5316\u89e6\u5a921"}, {"location": [[705, 926], [739, 926], [739, 949], [705, 949]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9183443188667297, 0.9227449893951416, 0.9166637063026428], "confidence_by_field": 0.9166637063026428, "original_text": "0.2"}, {"location": [[959, 939], [1025, 939], [1025, 963], [959, 963]], "type": "textline", "text": "(\u30bb\u30e9\u30df\u30c3\u30af)", "confidence_by_character": [0.916853666305542, 0.9260740876197815, 0.9092920422554016, 0.9257704615592957, 0.9260854721069336, 0.9179700613021851, 0.9244228005409241], "confidence_by_field": 0.9092920422554016, "original_text": "(\u30bb\u30e9\u30df\u30c3\u30af)"}, {"location": [[1049, 939], [1115, 939], [1115, 963], [1049, 963]], "type": "textline", "text": "(\u30bb\u30e9\u30df\u30c3\u30af)", "confidence_by_character": [0.9169306755065918, 0.9260130524635315, 0.9072588682174683, 0.9243903160095215, 0.925151526927948, 0.9165183901786804, 0.9235830307006836], "confidence_by_field": 0.9072588682174683, "original_text": "(\u30bb\u30e9\u30df\u30c3\u30af)"}, {"location": [[273, 1052], [489, 1052], [489, 1071], [273, 1071]], "type": "textline", "text": "L125D-L125U", "confidence_by_character": [0.9245724678039551, 0.9132282733917236, 0.906276524066925, 0.9224117398262024, 0.9258933067321777, 0.9184830784797668, 0.9157562255859375, 0.9087009429931641, 0.912157416343689, 0.9245545268058777, 0.8954219222068787], "confidence_by_field": 0.8954219222068787, "original_text": "L125D-L125U"}, {"location": [[277, 1020], [458, 1020], [458, 1039], [277, 1039]], "type": "textline", "text": "IL1TD-L1TU", "confidence_by_character": [0.8654385209083557, 0.9101585149765015, 0.9015868306159973, 0.8901948928833008, 0.9379502534866333, 0.9125338792800903, 0.9176797866821289, 0.8999291658401489, 0.9107672572135925, 0.9229012131690979], "confidence_by_field": 0.8654385209083557, "original_text": "IL1TD-L1TU"}, {"location": [[386, 987], [431, 987], [431, 1011], [386, 1011]], "type": "textline", "text": "\u5408\u8a08", "confidence_by_character": [0.9146950840950012, 0.9339513182640076], "confidence_by_field": 0.9146950840950012, "original_text": "\u5408\u8a08"}, {"location": [[701, 958], [743, 958], [743, 977], [701, 977]], "type": "textline", "text": "74.8", "confidence_by_character": [0.9160793423652649, 0.9184203147888184, 0.9271222949028015, 0.9212100505828857], "confidence_by_field": 0.9160793423652649, "original_text": "74.8"}, {"location": [[792, 958], [831, 958], [831, 977], [792, 977]], "type": "textline", "text": "74.8", "confidence_by_character": [0.9163533449172974, 0.9201879501342773, 0.9259200692176819, 0.9224289059638977], "confidence_by_field": 0.9163533449172974, "original_text": "74.8"}, {"location": [[796, 926], [826, 926], [826, 945], [796, 945]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9181907176971436, 0.9227388501167297, 0.9164943099021912], "confidence_by_field": 0.9164943099021912, "original_text": "0.2"}, {"location": [[886, 926], [917, 926], [917, 945], [886, 945]], "type": "textline", "text": "0.2", "confidence_by_character": [0.917766809463501, 0.9248180985450745, 0.916694164276123], "confidence_by_field": 0.916694164276123, "original_text": "0.2"}, {"location": [[1153, 923], [1190, 923], [1190, 949], [1153, 949]], "type": "textline", "text": "0.2", "confidence_by_character": [0.9172568917274475, 0.923073947429657, 0.9173568487167358], "confidence_by_field": 0.9172568917274475, "original_text": "0.2"}, {"location": [[1243, 923], [1280, 923], [1280, 949], [1243, 949]], "type": "textline", "text": "0.2", "confidence_by_character": [0.918009340763092, 0.9230136871337891, 0.9172936081886292], "confidence_by_field": 0.9172936081886292, "original_text": "0.2"}, {"location": [[603, 1021], [649, 1021], [649, 1044], [603, 1044]], "type": "textline", "text": "ppm", "confidence_by_character": [0.93487948179245, 0.9136003255844116, 0.9295913577079773], "confidence_by_field": 0.9136003255844116, "original_text": "ppm"}, {"location": [[702, 1019], [741, 1019], [741, 1042], [702, 1042]], "type": "textline", "text": "510", "confidence_by_character": [0.9160666465759277, 0.9195601344108582, 0.9182965755462646], "confidence_by_field": 0.9160666465759277, "original_text": "510"}, {"location": [[704, 990], [739, 990], [739, 1009], [704, 1009]], "type": "textline", "text": "100", "confidence_by_character": [0.9155290126800537, 0.9208177924156189, 0.9127534031867981], "confidence_by_field": 0.9127534031867981, "original_text": "100"}, {"location": [[794, 990], [830, 990], [830, 1009], [794, 1009]], "type": "textline", "text": "100", "confidence_by_character": [0.9162766337394714, 0.9207051396369934, 0.9104440212249756], "confidence_by_field": 0.9104440212249756, "original_text": "100"}, {"location": [[881, 958], [922, 958], [922, 977], [881, 977]], "type": "textline", "text": "74.8", "confidence_by_character": [0.9179209470748901, 0.919108510017395, 0.9271717667579651, 0.9218475222587585], "confidence_by_field": 0.9179209470748901, "original_text": "74.8"}, {"location": [[1149, 957], [1195, 957], [1195, 980], [1149, 980]], "type": "textline", "text": "74.8", "confidence_by_character": [0.91691654920578, 0.9166283011436462, 0.9277328848838806, 0.9221088886260986], "confidence_by_field": 0.9166283011436462, "original_text": "74.8"}, {"location": [[881, 1019], [920, 1019], [920, 1042], [881, 1042]], "type": "textline", "text": "470", "confidence_by_character": [0.9185625314712524, 0.9136890769004822, 0.9183825254440308], "confidence_by_field": 0.9136890769004822, "original_text": "470"}, {"location": [[883, 990], [920, 990], [920, 1009], [883, 1009]], "type": "textline", "text": "100", "confidence_by_character": [0.9160255789756775, 0.9194254279136658, 0.9109370708465576], "confidence_by_field": 0.9109370708465576, "original_text": "100"}, {"location": [[1151, 987], [1193, 987], [1193, 1012], [1151, 1012]], "type": "textline", "text": "100", "confidence_by_character": [0.9162431955337524, 0.9209614992141724, 0.9097722768783569], "confidence_by_field": 0.9097722768783569, "original_text": "100"}, {"location": [[1240, 957], [1285, 957], [1285, 980], [1240, 980]], "type": "textline", "text": "74.8", "confidence_by_character": [0.9182584881782532, 0.92033451795578, 0.9307538270950317, 0.9210019111633301], "confidence_by_field": 0.9182584881782532, "original_text": "74.8"}, {"location": [[1241, 1019], [1282, 1019], [1282, 1042], [1241, 1042]], "type": "textline", "text": "510", "confidence_by_character": [0.914992094039917, 0.9192731976509094, 0.9183305501937866], "confidence_by_field": 0.914992094039917, "original_text": "510"}, {"location": [[1241, 988], [1282, 988], [1282, 1012], [1241, 1012]], "type": "textline", "text": "100", "confidence_by_character": [0.9163695573806763, 0.9216129779815674, 0.9101614952087402], "confidence_by_field": 0.9101614952087402, "original_text": "100"}, {"location": [[278, 1082], [413, 1082], [413, 1103], [278, 1103]], "type": "textline", "text": "\u30ac\u30e9\u30b9\u8ee2\u79fb\u6e29\u5ea6", "confidence_by_character": [0.9343377947807312, 0.920525074005127, 0.9253968000411987, 0.9179226756095886, 0.9002148509025574, 0.9482362270355225, 0.9286996722221375], "confidence_by_field": 0.9002148509025574, "original_text": "\u30ac\u30e9\u30b9\u8ee2\u79fb\u6e29\u5ea6"}, {"location": [[420, 1082], [458, 1082], [458, 1104], [420, 1104]], "type": "textline", "text": "(Tg)", "confidence_by_character": [0.9211322665214539, 0.8978011012077332, 0.9115868210792542, 0.9274181723594666], "confidence_by_field": 0.8978011012077332, "original_text": "(Tg)"}, {"location": [[605, 1051], [650, 1051], [650, 1075], [605, 1075]], "type": "textline", "text": "ppm", "confidence_by_character": [0.9387204647064209, 0.9056726098060608, 0.9325636029243469], "confidence_by_field": 0.9056726098060608, "original_text": "ppm"}, {"location": [[617, 1082], [641, 1082], [641, 1104], [617, 1104]], "type": "textline", "text": "\u00b0C", "confidence_by_character": [0.9452391266822815, 0.9272077679634094], "confidence_by_field": 0.9272077679634094, "original_text": "\u00b0C"}, {"location": [[704, 1052], [739, 1052], [739, 1071], [704, 1071]], "type": "textline", "text": "390", "confidence_by_character": [0.9161151647567749, 0.9135255217552185, 0.9237526655197144], "confidence_by_field": 0.9135255217552185, "original_text": "390"}, {"location": [[793, 1084], [828, 1084], [828, 1103], [793, 1103]], "type": "textline", "text": "224", "confidence_by_character": [0.9199969172477722, 0.9111586213111877, 0.9230489730834961], "confidence_by_field": 0.9111586213111877, "original_text": "224"}, {"location": [[793, 1052], [830, 1052], [830, 1071], [793, 1071]], "type": "textline", "text": "330", "confidence_by_character": [0.9186529517173767, 0.913848340511322, 0.9239091277122498], "confidence_by_field": 0.913848340511322, "original_text": "330"}, {"location": [[793, 1019], [831, 1019], [831, 1042], [793, 1042]], "type": "textline", "text": "470", "confidence_by_character": [0.9227962493896484, 0.9154515862464905, 0.9186294078826904], "confidence_by_field": 0.9154515862464905, "original_text": "470"}, {"location": [[1151, 1019], [1191, 1019], [1191, 1044], [1151, 1044]], "type": "textline", "text": "510", "confidence_by_character": [0.919104814529419, 0.9196509122848511, 0.9189945459365845], "confidence_by_field": 0.9189945459365845, "original_text": "510"}, {"location": [[881, 1081], [920, 1081], [920, 1104], [881, 1104]], "type": "textline", "text": "224", "confidence_by_character": [0.9203312993049622, 0.9107922315597534, 0.9243770241737366], "confidence_by_field": 0.9107922315597534, "original_text": "224"}, {"location": [[881, 1050], [920, 1050], [920, 1074], [881, 1074]], "type": "textline", "text": "330", "confidence_by_character": [0.9171018600463867, 0.9141280055046082, 0.9222427010536194], "confidence_by_field": 0.9141280055046082, "original_text": "330"}, {"location": [[1151, 1084], [1191, 1084], [1191, 1108], [1151, 1108]], "type": "textline", "text": "235", "confidence_by_character": [0.9159690141677856, 0.9248628616333008, 0.9205185174942017], "confidence_by_field": 0.9159690141677856, "original_text": "235"}, {"location": [[1151, 1050], [1191, 1050], [1191, 1074], [1151, 1074]], "type": "textline", "text": "390", "confidence_by_character": [0.9146880507469177, 0.911052405834198, 0.9227294325828552], "confidence_by_field": 0.911052405834198, "original_text": "390"}, {"location": [[1241, 1084], [1280, 1084], [1280, 1108], [1241, 1108]], "type": "textline", "text": "235", "confidence_by_character": [0.9159569144248962, 0.9244524240493774, 0.920172393321991], "confidence_by_field": 0.9159569144248962, "original_text": "235"}, {"location": [[1241, 1050], [1282, 1050], [1282, 1074], [1241, 1074]], "type": "textline", "text": "390", "confidence_by_character": [0.9140211939811707, 0.9115845561027527, 0.9224275946617126], "confidence_by_field": 0.9115845561027527, "original_text": "390"}, {"location": [[184, 338], [205, 338], [205, 508], [184, 508]], "type": "textline", "text": "\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269", "confidence_by_character": [0.9959474205970764, 0.9997753500938416, 0.9999818801879883, 0.9999709129333496, 0.9999679327011108, 0.9999550580978394, 0.9999865293502808, 0.9992927312850952, 0.9999799728393555], "confidence_by_field": 0.9959474205970764, "original_text": "\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269"}, {"location": [[189, 804], [210, 804], [210, 976], [189, 976]], "type": "textline", "text": "\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269", "confidence_by_character": [0.9931052327156067, 0.9999232292175293, 0.9999855756759644, 0.9999867677688599, 0.9999822378158569, 0.9999606609344482, 0.9999895095825195, 0.9993935823440552, 0.9371008276939392], "confidence_by_field": 0.9371008276939392, "original_text": "\u71b1\u786c\u5316\u6027\u6a39\u8102\u7d44\u6210\u7269"}], "table": [{"location": [[1218, 1080], [1303, 1080], [1303, 1107], [1218, 1107]], "bbox": [1218, 1080, 1303, 1107], "points": [[1277, 1081], [1278, 1080], [1279, 1081], [1280, 1080], [1281, 1081], [1282, 1080], [1284, 1081], [1285, 1080], [1286, 1081], [1287, 1080], [1288, 1081], [1289, 1080], [1291, 1081], [1302, 1081], [1303, 1082], [1303, 1106], [1302, 1107], [1219, 1107], [1218, 1106], [1218, 1082], [1219, 1081]], "type": "cell", "rows": [11, 11], "columns": [10, 10], "text_list": []}, {"location": [[1128, 1080], [1213, 1080], [1213, 1107], [1128, 1107]], "bbox": [1128, 1080, 1213, 1107], "points": [[1129, 1081], [1130, 1080], [1132, 1081], [1212, 1081], [1213, 1082], [1213, 1106], [1212, 1107], [1129, 1107], [1128, 1106], [1129, 1105]], "type": "cell", "rows": [11, 11], "columns": [9, 9], "text_list": []}, {"location": [[858, 1080], [943, 1080], [943, 1107], [858, 1107]], "bbox": [858, 1080, 943, 1107], "points": [[877, 1081], [878, 1080], [879, 1081], [880, 1080], [881, 1081], [882, 1080], [884, 1081], [885, 1080], [887, 1080], [888, 1081], [889, 1080], [934, 1080], [935, 1081], [942, 1081], [943, 1082], [943, 1106], [942, 1107], [859, 1107], [858, 1106], [858, 1082], [859, 1081]], "type": "cell", "rows": [11, 11], "columns": [6, 6], "text_list": []}, {"location": [[769, 1080], [853, 1080], [853, 1107], [769, 1107]], "bbox": [769, 1080, 853, 1107], "points": [[792, 1081], [794, 1080], [795, 1081], [806, 1081], [808, 1080], [809, 1081], [811, 1081], [812, 1080], [815, 1080], [816, 1081], [817, 1080], [838, 1080], [839, 1081], [840, 1080], [842, 1081], [852, 1081], [853, 1082], [853, 1106], [852, 1107], [770, 1107], [769, 1106], [769, 1082], [770, 1081]], "type": "cell", "rows": [11, 11], "columns": [5, 5], "text_list": []}, {"location": [[678, 1080], [763, 1080], [763, 1107], [678, 1107]], "bbox": [678, 1080, 763, 1107], "points": [[704, 1081], [705, 1080], [706, 1081], [707, 1080], [708, 1081], [709, 1080], [711, 1081], [712, 1080], [713, 1081], [714, 1080], [715, 1081], [716, 1080], [718, 1081], [719, 1080], [720, 1081], [721, 1080], [722, 1081], [723, 1080], [725, 1081], [726, 1080], [727, 1081], [762, 1081], [763, 1082], [763, 1106], [762, 1107], [679, 1107], [678, 1106], [678, 1082], [679, 1081]], "type": "cell", "rows": [11, 11], "columns": [4, 4], "text_list": []}, {"location": [[577, 1080], [674, 1080], [674, 1107], [577, 1107]], "bbox": [577, 1080, 674, 1107], "points": [[622, 1081], [623, 1080], [625, 1080], [626, 1081], [628, 1081], [629, 1080], [635, 1080], [636, 1081], [672, 1081], [673, 1082], [673, 1104], [674, 1105], [674, 1106], [673, 1107], [578, 1107], [577, 1106], [577, 1082], [578, 1081]], "type": "cell", "rows": [11, 11], "columns": [3, 3], "text_list": []}, {"location": [[171, 1080], [574, 1080], [574, 1107], [171, 1107]], "bbox": [171, 1080, 574, 1107], "points": [[171, 1081], [173, 1080], [174, 1081], [198, 1081], [199, 1080], [201, 1081], [202, 1080], [221, 1080], [222, 1081], [223, 1080], [224, 1081], [225, 1080], [226, 1081], [285, 1081], [286, 1080], [288, 1080], [290, 1081], [292, 1081], [293, 1080], [297, 1080], [298, 1081], [299, 1081], [300, 1080], [312, 1080], [313, 1081], [318, 1081], [319, 1080], [320, 1081], [321, 1080], [370, 1080], [371, 1081], [374, 1081], [375, 1080], [390, 1080], [391, 1081], [394, 1081], [395, 1080], [408, 1080], [409, 1081], [410, 1080], [411, 1081], [418, 1081], [419, 1080], [421, 1081], [422, 1080], [423, 1081], [425, 1081], [426, 1080], [443, 1080], [444, 1081], [451, 1081], [452, 1080], [456, 1080], [457, 1081], [573, 1081], [574, 1082], [574, 1106], [573, 1107], [173, 1107], [171, 1106]], "type": "cell", "rows": [11, 11], "columns": [0, 2], "text_list": []}, {"location": [[1218, 1050], [1303, 1050], [1303, 1075], [1218, 1075]], "bbox": [1218, 1050, 1303, 1075], "points": [[1218, 1051], [1219, 1050], [1302, 1050], [1303, 1051], [1303, 1074], [1302, 1075], [1219, 1075], [1218, 1074]], "type": "cell", "rows": [10, 10], "columns": [10, 10], "text_list": []}, {"location": [[1129, 1050], [1213, 1050], [1213, 1075], [1129, 1075]], "bbox": [1129, 1050, 1213, 1075], "points": [[1129, 1051], [1130, 1050], [1212, 1050], [1213, 1051], [1213, 1074], [1212, 1075], [1130, 1075], [1129, 1074]], "type": "cell", "rows": [10, 10], "columns": [9, 9], "text_list": []}, {"location": [[858, 1050], [943, 1050], [943, 1075], [858, 1075]], "bbox": [858, 1050, 943, 1075], "points": [[858, 1051], [859, 1050], [942, 1050], [943, 1051], [943, 1074], [942, 1075], [859, 1075], [858, 1074]], "type": "cell", "rows": [10, 10], "columns": [6, 6], "text_list": []}, {"location": [[769, 1050], [853, 1050], [853, 1075], [769, 1075]], "bbox": [769, 1050, 853, 1075], "points": [[769, 1051], [770, 1050], [852, 1050], [853, 1051], [853, 1074], [852, 1075], [770, 1075], [769, 1074]], "type": "cell", "rows": [10, 10], "columns": [5, 5], "text_list": []}, {"location": [[678, 1050], [763, 1050], [763, 1075], [678, 1075]], "bbox": [678, 1050, 763, 1075], "points": [[678, 1051], [679, 1050], [762, 1050], [763, 1051], [763, 1074], [762, 1075], [679, 1075], [678, 1074]], "type": "cell", "rows": [10, 10], "columns": [4, 4], "text_list": []}, {"location": [[577, 1050], [673, 1050], [673, 1075], [577, 1075]], "bbox": [577, 1050, 673, 1075], "points": [[577, 1051], [578, 1050], [672, 1050], [673, 1051], [673, 1074], [672, 1075], [580, 1075], [578, 1074], [578, 1061], [577, 1060], [577, 1054], [578, 1053], [578, 1052]], "type": "cell", "rows": [10, 10], "columns": [3, 3], "text_list": []}, {"location": [[171, 1050], [574, 1050], [574, 1075], [171, 1075]], "bbox": [171, 1050, 574, 1075], "points": [[171, 1051], [173, 1050], [573, 1050], [574, 1051], [574, 1074], [573, 1075], [173, 1075], [171, 1074]], "type": "cell", "rows": [10, 10], "columns": [0, 2], "text_list": []}, {"location": [[577, 1018], [673, 1018], [673, 1044], [577, 1044]], "bbox": [577, 1018, 673, 1044], "points": [[577, 1019], [578, 1018], [672, 1018], [673, 1019], [673, 1043], [672, 1044], [580, 1044], [578, 1043], [578, 1031], [577, 1030]], "type": "cell", "rows": [9, 9], "columns": [3, 3], "text_list": []}, {"location": [[1218, 1017], [1303, 1017], [1303, 1044], [1218, 1044]], "bbox": [1218, 1017, 1303, 1044], "points": [[1244, 1018], [1245, 1017], [1289, 1017], [1291, 1018], [1292, 1017], [1293, 1018], [1302, 1018], [1303, 1019], [1303, 1043], [1302, 1044], [1219, 1044], [1218, 1043], [1218, 1019], [1219, 1018]], "type": "cell", "rows": [9, 9], "columns": [10, 10], "text_list": []}, {"location": [[1129, 1017], [1213, 1017], [1213, 1044], [1129, 1044]], "bbox": [1129, 1017, 1213, 1044], "points": [[1153, 1018], [1154, 1017], [1155, 1018], [1156, 1018], [1157, 1017], [1196, 1017], [1197, 1018], [1198, 1017], [1199, 1018], [1212, 1018], [1213, 1019], [1213, 1043], [1212, 1044], [1130, 1044], [1129, 1043], [1129, 1019], [1130, 1018]], "type": "cell", "rows": [9, 9], "columns": [9, 9], "text_list": []}, {"location": [[858, 1017], [943, 1017], [943, 1044], [858, 1044]], "bbox": [858, 1017, 943, 1044], "points": [[884, 1018], [885, 1017], [929, 1017], [930, 1018], [942, 1018], [943, 1019], [943, 1043], [942, 1044], [859, 1044], [858, 1043], [858, 1019], [859, 1018]], "type": "cell", "rows": [9, 9], "columns": [6, 6], "text_list": []}, {"location": [[769, 1017], [853, 1017], [853, 1044], [769, 1044]], "bbox": [769, 1017, 853, 1044], "points": [[795, 1018], [796, 1017], [836, 1017], [837, 1018], [838, 1017], [839, 1018], [852, 1018], [853, 1019], [853, 1043], [852, 1044], [770, 1044], [769, 1043], [769, 1019], [770, 1018]], "type": "cell", "rows": [9, 9], "columns": [5, 5], "text_list": []}, {"location": [[678, 1017], [763, 1017], [763, 1044], [678, 1044]], "bbox": [678, 1017, 763, 1044], "points": [[704, 1018], [705, 1017], [706, 1018], [707, 1017], [747, 1017], [748, 1018], [749, 1017], [750, 1018], [762, 1018], [763, 1019], [763, 1043], [762, 1044], [679, 1044], [678, 1043], [678, 1019], [679, 1018]], "type": "cell", "rows": [9, 9], "columns": [4, 4], "text_list": []}, {"location": [[171, 1017], [574, 1017], [574, 1044], [171, 1044]], "bbox": [171, 1017, 574, 1044], "points": [[273, 1018], [274, 1017], [276, 1018], [277, 1017], [366, 1017], [367, 1018], [383, 1018], [384, 1017], [396, 1017], [397, 1018], [398, 1017], [399, 1018], [401, 1017], [402, 1018], [403, 1017], [464, 1017], [465, 1018], [467, 1018], [468, 1017], [471, 1017], [472, 1018], [573, 1018], [574, 1019], [574, 1043], [573, 1044], [173, 1044], [171, 1043], [171, 1019], [173, 1018]], "type": "cell", "rows": [9, 9], "columns": [0, 2], "text_list": []}, {"location": [[1218, 987], [1303, 987], [1303, 1013], [1218, 1013]], "bbox": [1218, 987, 1303, 1013], "points": [[1218, 988], [1219, 987], [1302, 987], [1303, 988], [1303, 1012], [1302, 1013], [1219, 1013], [1218, 1012]], "type": "cell", "rows": [8, 8], "columns": [10, 10], "text_list": []}, {"location": [[1129, 987], [1213, 987], [1213, 1013], [1129, 1013]], "bbox": [1129, 987, 1213, 1013], "points": [[1129, 988], [1130, 987], [1212, 987], [1213, 988], [1213, 1012], [1212, 1013], [1130, 1013], [1129, 1012]], "type": "cell", "rows": [8, 8], "columns": [9, 9], "text_list": []}, {"location": [[858, 987], [943, 987], [943, 1013], [858, 1013]], "bbox": [858, 987, 943, 1013], "points": [[858, 988], [859, 987], [942, 987], [943, 988], [943, 1012], [942, 1013], [859, 1013], [858, 1012]], "type": "cell", "rows": [8, 8], "columns": [6, 6], "text_list": []}, {"location": [[769, 987], [853, 987], [853, 1013], [769, 1013]], "bbox": [769, 987, 853, 1013], "points": [[769, 988], [770, 987], [852, 987], [853, 988], [853, 1012], [852, 1013], [770, 1013], [769, 1012]], "type": "cell", "rows": [8, 8], "columns": [5, 5], "text_list": []}, {"location": [[678, 987], [763, 987], [763, 1013], [678, 1013]], "bbox": [678, 987, 763, 1013], "points": [[678, 988], [679, 987], [762, 987], [763, 988], [763, 1012], [762, 1013], [679, 1013], [678, 1012]], "type": "cell", "rows": [8, 8], "columns": [4, 4], "text_list": []}, {"location": [[240, 987], [574, 987], [574, 1013], [240, 1013]], "bbox": [240, 987, 574, 1013], "points": [[240, 988], [242, 987], [573, 987], [574, 988], [574, 1012], [573, 1013], [242, 1013], [240, 1012]], "type": "cell", "rows": [8, 8], "columns": [1, 2], "text_list": []}, {"location": [[240, 955], [383, 955], [383, 983], [240, 983]], "bbox": [240, 955, 383, 983], "points": [[240, 956], [242, 955], [382, 955], [383, 956], [383, 982], [382, 983], [242, 983], [240, 982]], "type": "cell", "rows": [7, 7], "columns": [1, 1], "text_list": []}, {"location": [[1218, 954], [1303, 954], [1303, 983], [1218, 983]], "bbox": [1218, 954, 1303, 983], "points": [[1249, 955], [1250, 954], [1264, 954], [1265, 955], [1302, 955], [1303, 956], [1303, 982], [1302, 983], [1219, 983], [1218, 982], [1219, 981], [1218, 980], [1218, 956], [1219, 955]], "type": "cell", "rows": [7, 7], "columns": [10, 10], "text_list": []}, {"location": [[1129, 954], [1213, 954], [1213, 983], [1129, 983]], "bbox": [1129, 954, 1213, 983], "points": [[1164, 955], [1165, 954], [1167, 955], [1168, 954], [1169, 955], [1170, 954], [1171, 955], [1173, 954], [1174, 955], [1212, 955], [1213, 956], [1213, 982], [1212, 983], [1130, 983], [1129, 982], [1129, 956], [1130, 955]], "type": "cell", "rows": [7, 7], "columns": [9, 9], "text_list": []}, {"location": [[858, 954], [943, 954], [943, 983], [858, 983]], "bbox": [858, 954, 943, 983], "points": [[888, 955], [889, 954], [904, 954], [905, 955], [942, 955], [943, 956], [943, 982], [942, 983], [859, 983], [858, 982], [858, 956], [859, 955]], "type": "cell", "rows": [7, 7], "columns": [6, 6], "text_list": []}, {"location": [[769, 954], [853, 954], [853, 983], [769, 983]], "bbox": [769, 954, 853, 983], "points": [[806, 955], [808, 954], [809, 955], [810, 954], [811, 955], [852, 955], [853, 956], [853, 982], [852, 983], [770, 983], [769, 982], [769, 956], [770, 955]], "type": "cell", "rows": [7, 7], "columns": [5, 5], "text_list": []}, {"location": [[678, 954], [763, 954], [763, 983], [678, 983]], "bbox": [678, 954, 763, 983], "points": [[713, 955], [714, 954], [715, 955], [762, 955], [763, 956], [763, 982], [762, 983], [679, 983], [678, 982], [678, 956], [679, 955]], "type": "cell", "rows": [7, 7], "columns": [4, 4], "text_list": []}, {"location": [[388, 954], [574, 954], [574, 983], [388, 983]], "bbox": [388, 954, 574, 983], "points": [[388, 955], [389, 954], [391, 954], [392, 955], [573, 955], [574, 956], [574, 982], [573, 983], [389, 983], [388, 982]], "type": "cell", "rows": [7, 7], "columns": [2, 2], "text_list": []}, {"location": [[1218, 923], [1303, 923], [1303, 950], [1218, 950]], "bbox": [1218, 923, 1303, 950], "points": [[1244, 925], [1245, 923], [1287, 923], [1288, 925], [1289, 923], [1291, 925], [1292, 923], [1293, 925], [1302, 925], [1303, 926], [1303, 949], [1302, 950], [1219, 950], [1218, 949], [1219, 948], [1218, 947], [1218, 926], [1219, 925]], "type": "cell", "rows": [6, 6], "columns": [10, 10], "text_list": []}, {"location": [[1129, 923], [1213, 923], [1213, 950], [1129, 950]], "bbox": [1129, 923, 1213, 950], "points": [[1129, 925], [1130, 923], [1132, 925], [1141, 925], [1142, 923], [1143, 925], [1144, 923], [1146, 925], [1147, 923], [1148, 925], [1149, 923], [1150, 925], [1151, 923], [1187, 923], [1188, 925], [1189, 923], [1190, 925], [1191, 923], [1192, 925], [1212, 925], [1213, 926], [1213, 949], [1212, 950], [1130, 950], [1129, 949]], "type": "cell", "rows": [6, 6], "columns": [9, 9], "text_list": []}, {"location": [[858, 923], [943, 923], [943, 950], [858, 950]], "bbox": [858, 923, 943, 950], "points": [[884, 925], [885, 923], [929, 923], [930, 925], [932, 923], [933, 925], [942, 925], [943, 926], [943, 949], [942, 950], [859, 950], [858, 949], [858, 926], [859, 925]], "type": "cell", "rows": [6, 6], "columns": [6, 6], "text_list": []}, {"location": [[769, 923], [853, 923], [853, 950], [769, 950]], "bbox": [769, 923, 853, 950], "points": [[790, 925], [791, 923], [792, 925], [794, 923], [824, 923], [825, 925], [826, 923], [828, 925], [829, 923], [830, 925], [831, 923], [832, 925], [833, 923], [835, 925], [836, 923], [837, 925], [852, 925], [853, 926], [853, 949], [852, 950], [770, 950], [769, 949], [769, 926], [770, 925]], "type": "cell", "rows": [6, 6], "columns": [5, 5], "text_list": []}, {"location": [[678, 923], [763, 923], [763, 950], [678, 950]], "bbox": [678, 923, 763, 950], "points": [[678, 925], [679, 923], [680, 925], [694, 925], [695, 923], [697, 925], [699, 925], [700, 923], [701, 925], [702, 923], [737, 923], [739, 925], [740, 923], [741, 925], [742, 923], [743, 925], [744, 923], [746, 925], [747, 923], [748, 925], [749, 923], [750, 925], [751, 923], [753, 925], [762, 925], [763, 926], [763, 949], [762, 950], [679, 950], [678, 949]], "type": "cell", "rows": [6, 6], "columns": [4, 4], "text_list": []}, {"location": [[388, 923], [574, 923], [574, 950], [388, 950]], "bbox": [388, 923, 574, 950], "points": [[388, 925], [389, 923], [390, 925], [391, 923], [410, 923], [411, 925], [414, 925], [415, 923], [422, 923], [423, 925], [425, 925], [426, 923], [429, 923], [430, 925], [431, 923], [438, 923], [439, 925], [440, 925], [442, 923], [446, 923], [447, 925], [449, 925], [450, 923], [454, 923], [456, 925], [457, 923], [490, 923], [491, 925], [492, 923], [493, 925], [494, 923], [495, 925], [497, 923], [498, 925], [499, 923], [500, 925], [501, 923], [502, 925], [504, 923], [505, 925], [506, 923], [507, 925], [508, 923], [509, 925], [511, 923], [512, 925], [513, 923], [514, 925], [515, 923], [516, 925], [518, 923], [519, 925], [520, 923], [521, 925], [522, 923], [523, 925], [525, 923], [526, 925], [527, 923], [528, 925], [529, 923], [530, 925], [532, 923], [533, 925], [534, 923], [535, 925], [536, 923], [537, 925], [539, 923], [546, 923], [547, 925], [548, 923], [549, 925], [550, 923], [552, 925], [553, 923], [554, 925], [555, 923], [556, 925], [573, 925], [574, 926], [574, 949], [573, 950], [389, 950], [388, 949]], "type": "cell", "rows": [6, 6], "columns": [2, 2], "text_list": []}, {"location": [[240, 923], [383, 923], [383, 950], [240, 950]], "bbox": [240, 923, 383, 950], "points": [[240, 925], [242, 923], [243, 925], [244, 925], [245, 923], [260, 923], [261, 925], [263, 923], [264, 925], [267, 925], [268, 923], [274, 923], [276, 925], [278, 925], [279, 923], [281, 923], [283, 925], [284, 923], [291, 923], [292, 925], [293, 925], [294, 923], [300, 923], [301, 925], [302, 923], [321, 923], [322, 925], [323, 923], [325, 925], [326, 923], [327, 925], [382, 925], [383, 926], [383, 949], [382, 950], [242, 950], [240, 949]], "type": "cell", "rows": [6, 6], "columns": [1, 1], "text_list": []}, {"location": [[1218, 893], [1303, 893], [1303, 920], [1218, 920]], "bbox": [1218, 893, 1303, 920], "points": [[1218, 894], [1219, 893], [1302, 893], [1303, 894], [1303, 919], [1302, 920], [1219, 920], [1218, 919]], "type": "cell", "rows": [5, 5], "columns": [10, 10], "text_list": []}, {"location": [[1129, 893], [1213, 893], [1213, 920], [1129, 920]], "bbox": [1129, 893, 1213, 920], "points": [[1129, 894], [1130, 893], [1212, 893], [1213, 894], [1213, 919], [1212, 920], [1130, 920], [1129, 919]], "type": "cell", "rows": [5, 5], "columns": [9, 9], "text_list": []}, {"location": [[858, 893], [943, 893], [943, 920], [858, 920]], "bbox": [858, 893, 943, 920], "points": [[858, 894], [859, 893], [942, 893], [943, 894], [943, 919], [942, 920], [859, 920], [858, 919]], "type": "cell", "rows": [5, 5], "columns": [6, 6], "text_list": []}, {"location": [[769, 893], [853, 893], [853, 920], [769, 920]], "bbox": [769, 893, 853, 920], "points": [[769, 894], [770, 893], [852, 893], [853, 894], [853, 919], [852, 920], [770, 920], [769, 919]], "type": "cell", "rows": [5, 5], "columns": [5, 5], "text_list": []}, {"location": [[678, 893], [763, 893], [763, 920], [678, 920]], "bbox": [678, 893, 763, 920], "points": [[678, 894], [679, 893], [762, 893], [763, 894], [763, 919], [762, 920], [679, 920], [678, 919], [679, 918], [678, 916]], "type": "cell", "rows": [5, 5], "columns": [4, 4], "text_list": []}, {"location": [[388, 893], [574, 893], [574, 920], [388, 920]], "bbox": [388, 893, 574, 920], "points": [[388, 894], [389, 893], [573, 893], [574, 894], [574, 919], [573, 920], [389, 920], [388, 919]], "type": "cell", "rows": [5, 5], "columns": [2, 2], "text_list": []}, {"location": [[1218, 860], [1303, 860], [1303, 887], [1218, 887]], "bbox": [1218, 860, 1303, 887], "points": [[1244, 861], [1245, 860], [1246, 861], [1247, 860], [1287, 860], [1288, 861], [1289, 860], [1291, 861], [1302, 861], [1303, 863], [1303, 886], [1302, 887], [1219, 887], [1218, 886], [1218, 863], [1219, 861]], "type": "cell", "rows": [4, 4], "columns": [10, 10], "text_list": []}, {"location": [[1129, 860], [1213, 860], [1213, 887], [1129, 887]], "bbox": [1129, 860, 1213, 887], "points": [[1143, 861], [1144, 860], [1146, 861], [1147, 860], [1148, 861], [1149, 860], [1150, 861], [1151, 860], [1153, 861], [1154, 860], [1155, 861], [1156, 860], [1157, 861], [1158, 860], [1191, 860], [1192, 861], [1194, 860], [1195, 861], [1212, 861], [1213, 863], [1213, 886], [1212, 887], [1130, 887], [1129, 886], [1129, 863], [1130, 861]], "type": "cell", "rows": [4, 4], "columns": [9, 9], "text_list": []}, {"location": [[858, 860], [943, 860], [943, 887], [858, 887]], "bbox": [858, 860, 943, 887], "points": [[874, 861], [875, 860], [877, 861], [878, 860], [879, 861], [884, 861], [885, 860], [886, 861], [887, 860], [929, 860], [930, 861], [932, 860], [933, 861], [942, 861], [943, 863], [943, 886], [942, 887], [859, 887], [858, 886], [858, 863], [859, 861]], "type": "cell", "rows": [4, 4], "columns": [6, 6], "text_list": []}, {"location": [[769, 860], [853, 860], [853, 887], [769, 887]], "bbox": [769, 860, 853, 887], "points": [[795, 861], [796, 860], [797, 861], [798, 860], [831, 860], [832, 861], [833, 860], [835, 861], [852, 861], [853, 863], [853, 886], [852, 887], [770, 887], [769, 886], [769, 863], [770, 861]], "type": "cell", "rows": [4, 4], "columns": [5, 5], "text_list": []}, {"location": [[678, 860], [763, 860], [763, 887], [678, 887]], "bbox": [678, 860, 763, 887], "points": [[678, 861], [679, 860], [680, 861], [704, 861], [705, 860], [706, 861], [707, 860], [747, 860], [748, 861], [749, 860], [750, 861], [751, 860], [753, 861], [762, 861], [763, 863], [763, 886], [762, 887], [679, 887], [678, 886]], "type": "cell", "rows": [4, 4], "columns": [4, 4], "text_list": []}, {"location": [[388, 860], [574, 860], [574, 887], [388, 887]], "bbox": [388, 860, 574, 887], "points": [[388, 861], [389, 860], [392, 860], [394, 861], [411, 861], [412, 860], [414, 861], [415, 860], [416, 861], [417, 860], [548, 860], [549, 861], [550, 860], [552, 861], [553, 860], [554, 861], [555, 860], [556, 861], [557, 860], [559, 861], [560, 860], [561, 861], [573, 861], [574, 863], [574, 886], [573, 887], [389, 887], [388, 886]], "type": "cell", "rows": [4, 4], "columns": [2, 2], "text_list": []}, {"location": [[1218, 830], [1303, 830], [1303, 857], [1218, 857]], "bbox": [1218, 830, 1303, 857], "points": [[1218, 831], [1219, 830], [1302, 830], [1303, 831], [1303, 856], [1302, 857], [1219, 857], [1218, 856]], "type": "cell", "rows": [3, 3], "columns": [10, 10], "text_list": []}, {"location": [[1129, 830], [1213, 830], [1213, 857], [1129, 857]], "bbox": [1129, 830, 1213, 857], "points": [[1129, 831], [1130, 830], [1212, 830], [1213, 831], [1213, 856], [1212, 857], [1130, 857], [1129, 856]], "type": "cell", "rows": [3, 3], "columns": [9, 9], "text_list": []}, {"location": [[858, 830], [943, 830], [943, 857], [858, 857]], "bbox": [858, 830, 943, 857], "points": [[858, 831], [859, 830], [942, 830], [943, 831], [943, 856], [942, 857], [859, 857], [858, 856]], "type": "cell", "rows": [3, 3], "columns": [6, 6], "text_list": []}, {"location": [[769, 830], [853, 830], [853, 857], [769, 857]], "bbox": [769, 830, 853, 857], "points": [[769, 831], [770, 830], [852, 830], [853, 831], [853, 856], [852, 857], [770, 857], [769, 856]], "type": "cell", "rows": [3, 3], "columns": [5, 5], "text_list": []}, {"location": [[678, 830], [763, 830], [763, 857], [678, 857]], "bbox": [678, 830, 763, 857], "points": [[678, 831], [679, 830], [762, 830], [763, 831], [763, 856], [762, 857], [679, 857], [678, 856]], "type": "cell", "rows": [3, 3], "columns": [4, 4], "text_list": []}, {"location": [[388, 830], [574, 830], [574, 857], [388, 857]], "bbox": [388, 830, 574, 857], "points": [[388, 831], [389, 830], [573, 830], [574, 831], [574, 856], [573, 857], [389, 857], [388, 856]], "type": "cell", "rows": [3, 3], "columns": [2, 2], "text_list": []}, {"location": [[769, 798], [853, 798], [853, 824], [769, 824]], "bbox": [769, 798, 853, 824], "points": [[769, 799], [770, 798], [852, 798], [853, 799], [853, 823], [852, 824], [770, 824], [769, 823]], "type": "cell", "rows": [2, 2], "columns": [5, 5], "text_list": []}, {"location": [[1218, 797], [1303, 797], [1303, 824], [1218, 824]], "bbox": [1218, 797, 1303, 824], "points": [[1244, 798], [1245, 797], [1246, 798], [1247, 797], [1249, 798], [1250, 797], [1275, 797], [1277, 798], [1278, 797], [1279, 798], [1302, 798], [1303, 799], [1303, 823], [1302, 824], [1219, 824], [1218, 823], [1218, 799], [1219, 798]], "type": "cell", "rows": [2, 2], "columns": [10, 10], "text_list": []}, {"location": [[1129, 797], [1213, 797], [1213, 824], [1129, 824]], "bbox": [1129, 797, 1213, 824], "points": [[1129, 798], [1130, 797], [1132, 798], [1146, 798], [1147, 797], [1148, 798], [1149, 797], [1150, 798], [1151, 797], [1153, 798], [1154, 797], [1155, 798], [1164, 798], [1165, 797], [1167, 798], [1168, 797], [1169, 798], [1170, 797], [1171, 798], [1173, 797], [1174, 798], [1212, 798], [1213, 799], [1213, 823], [1212, 824], [1130, 824], [1129, 823]], "type": "cell", "rows": [2, 2], "columns": [9, 9], "text_list": []}, {"location": [[858, 797], [943, 797], [943, 824], [858, 824]], "bbox": [858, 797, 943, 824], "points": [[888, 798], [889, 797], [891, 798], [892, 797], [893, 798], [894, 797], [895, 798], [942, 798], [943, 799], [943, 823], [942, 824], [859, 824], [858, 823], [858, 799], [859, 798]], "type": "cell", "rows": [2, 2], "columns": [6, 6], "text_list": []}, {"location": [[678, 797], [763, 797], [763, 824], [678, 824]], "bbox": [678, 797, 763, 824], "points": [[678, 798], [679, 797], [680, 798], [704, 798], [705, 797], [706, 798], [707, 797], [708, 798], [709, 797], [711, 798], [712, 797], [733, 797], [734, 798], [735, 797], [736, 798], [762, 798], [763, 799], [763, 823], [762, 824], [679, 824], [678, 823]], "type": "cell", "rows": [2, 2], "columns": [4, 4], "text_list": []}, {"location": [[388, 797], [574, 797], [574, 824], [388, 824]], "bbox": [388, 797, 574, 824], "points": [[388, 798], [389, 797], [394, 797], [395, 798], [418, 798], [419, 797], [421, 798], [424, 798], [425, 797], [429, 797], [430, 798], [431, 797], [432, 798], [433, 797], [435, 798], [484, 798], [485, 797], [486, 798], [495, 798], [497, 797], [498, 798], [573, 798], [574, 799], [574, 823], [573, 824], [389, 824], [388, 823]], "type": "cell", "rows": [2, 2], "columns": [2, 2], "text_list": []}, {"location": [[240, 768], [383, 768], [383, 920], [240, 920]], "bbox": [240, 768, 383, 920], "points": [[240, 769], [242, 768], [382, 768], [383, 769], [383, 919], [382, 920], [242, 920], [240, 919]], "type": "cell", "rows": [1, 5], "columns": [1, 1], "text_list": []}, {"location": [[1218, 767], [1303, 767], [1303, 792], [1218, 792]], "bbox": [1218, 767, 1303, 792], "points": [[1218, 768], [1219, 767], [1220, 768], [1223, 768], [1224, 767], [1302, 767], [1303, 768], [1303, 791], [1302, 792], [1219, 792], [1218, 791]], "type": "cell", "rows": [1, 1], "columns": [10, 10], "text_list": []}, {"location": [[1128, 767], [1213, 767], [1213, 792], [1128, 792]], "bbox": [1128, 767, 1213, 792], "points": [[1130, 767], [1132, 768], [1133, 767], [1205, 767], [1206, 768], [1208, 767], [1209, 768], [1212, 768], [1213, 769], [1213, 791], [1212, 792], [1130, 792], [1129, 791], [1129, 773], [1128, 771], [1128, 769]], "type": "cell", "rows": [1, 1], "columns": [9, 9], "text_list": []}, {"location": [[1038, 767], [1123, 767], [1123, 1107], [1038, 1107]], "bbox": [1038, 767, 1123, 1107], "points": [[1038, 768], [1039, 767], [1040, 768], [1045, 768], [1046, 767], [1047, 768], [1049, 767], [1050, 768], [1051, 767], [1052, 768], [1053, 767], [1054, 768], [1068, 768], [1070, 767], [1071, 768], [1072, 767], [1073, 768], [1074, 767], [1075, 768], [1077, 767], [1078, 768], [1079, 767], [1080, 768], [1081, 767], [1082, 768], [1122, 768], [1123, 769], [1123, 1106], [1122, 1107], [1039, 1107], [1038, 1106]], "type": "cell", "rows": [1, 11], "columns": [8, 8], "text_list": []}, {"location": [[948, 767], [1033, 767], [1033, 1107], [948, 1107]], "bbox": [948, 767, 1033, 1107], "points": [[951, 768], [953, 767], [954, 768], [955, 767], [956, 768], [957, 767], [958, 768], [960, 767], [961, 768], [962, 767], [963, 768], [975, 768], [976, 767], [977, 768], [978, 767], [980, 768], [981, 767], [982, 768], [983, 767], [984, 768], [985, 767], [987, 768], [988, 767], [989, 768], [1032, 768], [1033, 769], [1033, 1106], [1032, 1107], [949, 1107], [948, 1106], [948, 1082], [949, 1081], [949, 1075], [948, 1074], [948, 1051], [949, 1050], [949, 1044], [948, 1043], [948, 1019], [949, 1018], [949, 1012], [948, 1011], [948, 988], [949, 987], [949, 982], [948, 981], [948, 956], [949, 955], [949, 950], [948, 949], [948, 926], [949, 925], [949, 919], [948, 918], [948, 894], [949, 893], [949, 887], [948, 886], [948, 863], [949, 861], [949, 856], [948, 854], [948, 831], [949, 830], [949, 824], [948, 823], [948, 799], [949, 798], [949, 792], [948, 791], [948, 769], [949, 768]], "type": "cell", "rows": [1, 11], "columns": [7, 7], "text_list": []}, {"location": [[858, 767], [943, 767], [943, 792], [858, 792]], "bbox": [858, 767, 943, 792], "points": [[858, 768], [859, 767], [860, 768], [861, 768], [863, 767], [936, 767], [937, 768], [939, 767], [940, 768], [941, 767], [943, 769], [943, 791], [942, 792], [859, 792], [858, 791]], "type": "cell", "rows": [1, 1], "columns": [6, 6], "text_list": []}, {"location": [[769, 767], [853, 767], [853, 792], [769, 792]], "bbox": [769, 767, 853, 792], "points": [[769, 768], [770, 767], [771, 768], [773, 767], [850, 767], [851, 768], [852, 768], [853, 769], [853, 791], [852, 792], [770, 792], [769, 791]], "type": "cell", "rows": [1, 1], "columns": [5, 5], "text_list": []}, {"location": [[678, 767], [763, 767], [763, 792], [678, 792]], "bbox": [678, 767, 763, 792], "points": [[678, 768], [679, 767], [680, 768], [682, 768], [684, 767], [761, 767], [763, 769], [763, 791], [762, 792], [679, 792], [678, 791]], "type": "cell", "rows": [1, 1], "columns": [4, 4], "text_list": []}, {"location": [[577, 767], [674, 767], [674, 1013], [577, 1013]], "bbox": [577, 767, 674, 1013], "points": [[624, 768], [625, 767], [626, 768], [628, 767], [630, 767], [631, 768], [632, 767], [633, 768], [635, 767], [636, 768], [673, 768], [674, 769], [673, 770], [673, 1012], [672, 1013], [578, 1013], [577, 1012], [578, 1011], [578, 988], [580, 987], [580, 983], [578, 982], [578, 955], [580, 954], [580, 950], [578, 949], [578, 925], [580, 923], [580, 920], [578, 919], [578, 893], [580, 892], [580, 888], [578, 887], [578, 861], [580, 860], [580, 857], [578, 856], [578, 830], [580, 829], [580, 824], [578, 823], [578, 798], [580, 797], [580, 792], [578, 791], [578, 769], [580, 768]], "type": "cell", "rows": [1, 8], "columns": [3, 3], "text_list": []}, {"location": [[388, 767], [574, 767], [574, 792], [388, 792]], "bbox": [388, 767, 574, 792], "points": [[388, 768], [389, 767], [390, 768], [391, 767], [562, 767], [563, 768], [564, 767], [566, 768], [573, 768], [574, 769], [574, 791], [573, 792], [389, 792], [388, 791]], "type": "cell", "rows": [1, 1], "columns": [2, 2], "text_list": []}, {"location": [[171, 767], [237, 767], [237, 1013], [171, 1013]], "bbox": [171, 767, 237, 1013], "points": [[171, 768], [173, 767], [174, 768], [236, 768], [237, 769], [237, 770], [236, 771], [236, 833], [237, 835], [236, 836], [237, 837], [236, 838], [237, 839], [236, 840], [237, 842], [236, 843], [237, 844], [236, 845], [236, 929], [237, 930], [237, 937], [236, 939], [236, 962], [237, 963], [237, 966], [236, 967], [237, 968], [236, 969], [237, 970], [236, 971], [236, 1009], [237, 1010], [237, 1012], [236, 1013], [173, 1013], [171, 1012]], "type": "cell", "rows": [1, 8], "columns": [0, 0], "text_list": []}, {"location": [[1218, 734], [1303, 734], [1303, 761], [1218, 761]], "bbox": [1218, 734, 1303, 761], "points": [[1218, 735], [1219, 734], [1220, 735], [1225, 735], [1226, 734], [1257, 734], [1258, 735], [1259, 734], [1302, 734], [1303, 735], [1303, 760], [1302, 761], [1219, 761], [1218, 760]], "type": "cell", "rows": [0, 0], "columns": [10, 10], "text_list": []}, {"location": [[1128, 734], [1213, 734], [1213, 761], [1128, 761]], "bbox": [1128, 734, 1213, 761], "points": [[1130, 734], [1132, 735], [1134, 735], [1135, 734], [1165, 734], [1167, 735], [1168, 735], [1169, 734], [1175, 734], [1176, 735], [1177, 735], [1178, 734], [1189, 734], [1190, 735], [1198, 735], [1199, 734], [1203, 734], [1204, 735], [1211, 735], [1212, 734], [1213, 735], [1213, 760], [1212, 761], [1129, 761], [1128, 760], [1128, 736]], "type": "cell", "rows": [0, 0], "columns": [9, 9], "text_list": []}, {"location": [[1038, 734], [1123, 734], [1123, 761], [1038, 761]], "bbox": [1038, 734, 1123, 761], "points": [[1038, 735], [1039, 734], [1040, 735], [1045, 735], [1046, 734], [1047, 735], [1050, 735], [1051, 734], [1056, 734], [1057, 735], [1058, 734], [1063, 734], [1064, 735], [1065, 734], [1075, 734], [1077, 735], [1078, 735], [1079, 734], [1084, 734], [1085, 735], [1088, 735], [1089, 734], [1098, 734], [1099, 735], [1104, 735], [1105, 734], [1106, 735], [1111, 735], [1112, 734], [1114, 734], [1115, 735], [1122, 735], [1123, 736], [1123, 760], [1122, 761], [1039, 761], [1038, 760]], "type": "cell", "rows": [0, 0], "columns": [8, 8], "text_list": []}, {"location": [[948, 734], [1033, 734], [1033, 761], [948, 761]], "bbox": [948, 734, 1033, 761], "points": [[954, 735], [955, 734], [957, 734], [958, 735], [960, 734], [964, 734], [966, 735], [968, 735], [969, 734], [971, 734], [973, 735], [974, 734], [975, 735], [976, 734], [978, 734], [980, 735], [981, 734], [985, 734], [987, 735], [989, 735], [990, 734], [995, 734], [996, 735], [997, 735], [998, 734], [1016, 734], [1017, 735], [1018, 734], [1019, 735], [1020, 734], [1023, 734], [1024, 735], [1025, 734], [1026, 735], [1027, 734], [1029, 735], [1030, 734], [1031, 735], [1032, 734], [1033, 735], [1033, 760], [1032, 761], [949, 761], [948, 760], [948, 736], [949, 735]], "type": "cell", "rows": [0, 0], "columns": [7, 7], "text_list": []}, {"location": [[858, 734], [943, 734], [943, 761], [858, 761]], "bbox": [858, 734, 943, 761], "points": [[858, 735], [859, 734], [860, 735], [863, 735], [864, 734], [865, 735], [866, 734], [878, 734], [879, 735], [886, 735], [887, 734], [889, 734], [891, 735], [895, 735], [897, 734], [898, 735], [900, 735], [901, 734], [902, 735], [905, 735], [906, 734], [934, 734], [935, 735], [940, 735], [941, 734], [943, 736], [943, 760], [942, 761], [859, 761], [858, 760]], "type": "cell", "rows": [0, 0], "columns": [6, 6], "text_list": []}, {"location": [[769, 734], [853, 734], [853, 761], [769, 761]], "bbox": [769, 734, 853, 761], "points": [[769, 735], [770, 734], [771, 735], [773, 734], [774, 735], [775, 734], [776, 735], [777, 734], [787, 734], [788, 735], [789, 734], [790, 735], [791, 734], [792, 735], [796, 735], [797, 734], [799, 734], [801, 735], [804, 735], [805, 734], [806, 735], [811, 735], [812, 734], [813, 735], [815, 734], [845, 734], [846, 735], [847, 734], [849, 735], [851, 735], [852, 734], [853, 735], [853, 760], [852, 761], [770, 761], [769, 760]], "type": "cell", "rows": [0, 0], "columns": [5, 5], "text_list": []}, {"location": [[678, 734], [763, 734], [763, 761], [678, 761]], "bbox": [678, 734, 763, 761], "points": [[678, 735], [679, 734], [680, 735], [685, 735], [686, 734], [698, 734], [699, 735], [706, 735], [707, 734], [709, 734], [711, 735], [715, 735], [716, 734], [718, 735], [720, 735], [721, 734], [759, 734], [760, 735], [761, 734], [762, 734], [763, 735], [763, 760], [762, 761], [679, 761], [678, 760]], "type": "cell", "rows": [0, 0], "columns": [4, 4], "text_list": []}, {"location": [[577, 734], [674, 734], [674, 761], [577, 761]], "bbox": [577, 734, 674, 761], "points": [[605, 735], [606, 734], [621, 734], [622, 735], [623, 734], [635, 734], [636, 735], [638, 735], [639, 734], [640, 735], [642, 734], [643, 735], [673, 735], [674, 736], [674, 760], [673, 761], [578, 761], [577, 760], [577, 736], [578, 735]], "type": "cell", "rows": [0, 0], "columns": [3, 3], "text_list": []}, {"location": [[171, 734], [574, 734], [574, 761], [171, 761]], "bbox": [171, 734, 574, 761], "points": [[196, 735], [197, 734], [198, 735], [199, 734], [214, 734], [215, 735], [216, 734], [217, 735], [218, 734], [219, 735], [573, 735], [574, 736], [574, 760], [573, 761], [173, 761], [171, 760], [171, 736], [173, 735]], "type": "cell", "rows": [0, 0], "columns": [0, 2], "text_list": []}, {"location": [[166, 729], [1309, 729], [1309, 1113], [166, 1113]], "bbox": [166, 729, 1309, 1113], "points": [[167, 729], [167, 788], [166, 789], [167, 790], [167, 792], [166, 794], [167, 795], [167, 1082], [166, 1084], [167, 1085], [167, 1113], [1308, 1113], [1309, 1112], [1309, 1107], [1308, 1106], [1308, 1101], [1309, 1100], [1308, 1099], [1309, 1098], [1308, 1096], [1309, 1095], [1309, 1075], [1308, 1074], [1308, 1073], [1309, 1072], [1308, 1071], [1309, 1070], [1309, 1056], [1308, 1054], [1309, 1053], [1308, 1052], [1309, 1051], [1309, 1044], [1308, 1043], [1309, 1042], [1308, 1040], [1309, 1039], [1308, 1038], [1309, 1037], [1309, 1020], [1308, 1019], [1309, 1018], [1309, 1011], [1308, 1010], [1309, 1009], [1309, 950], [1308, 949], [1309, 948], [1308, 947], [1309, 946], [1309, 929], [1308, 928], [1309, 927], [1308, 926], [1309, 925], [1309, 919], [1308, 918], [1308, 916], [1309, 915], [1309, 897], [1308, 895], [1309, 894], [1309, 887], [1308, 886], [1309, 885], [1308, 884], [1309, 882], [1308, 881], [1309, 880], [1309, 864], [1308, 863], [1309, 861], [1309, 856], [1308, 854], [1308, 853], [1309, 852], [1309, 836], [1308, 835], [1309, 833], [1308, 832], [1309, 831], [1309, 824], [1308, 823], [1309, 822], [1308, 820], [1309, 819], [1309, 792], [1308, 791], [1308, 790], [1309, 789], [1309, 773], [1308, 771], [1308, 769], [1309, 768], [1309, 761], [1308, 760], [1308, 736], [1309, 735], [1309, 730], [171, 730], [170, 729]], "type": "table", "contains": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84]}, {"location": [[1213, 549], [1298, 549], [1298, 576], [1213, 576]], "bbox": [1213, 549, 1298, 576], "points": [[1213, 550], [1215, 549], [1296, 549], [1298, 550], [1298, 575], [1296, 576], [1215, 576], [1213, 575]], "type": "cell", "rows": [10, 10], "columns": [10, 10], "text_list": []}, {"location": [[1123, 549], [1209, 549], [1209, 576], [1123, 576]], "bbox": [1123, 549, 1209, 576], "points": [[1123, 550], [1125, 549], [1208, 549], [1209, 550], [1209, 575], [1208, 576], [1125, 576], [1123, 575]], "type": "cell", "rows": [10, 10], "columns": [9, 9], "text_list": []}, {"location": [[1032, 549], [1118, 549], [1118, 576], [1032, 576]], "bbox": [1032, 549, 1118, 576], "points": [[1035, 549], [1116, 549], [1118, 550], [1118, 575], [1116, 576], [1035, 576], [1032, 574], [1032, 568], [1033, 567], [1032, 566], [1033, 564], [1032, 563], [1033, 562], [1032, 561], [1033, 560], [1032, 559], [1033, 557], [1033, 553], [1032, 552]], "type": "cell", "rows": [10, 10], "columns": [8, 8], "text_list": []}, {"location": [[942, 549], [1027, 549], [1027, 576], [942, 576]], "bbox": [942, 549, 1027, 576], "points": [[942, 550], [943, 549], [1026, 549], [1027, 550], [1027, 575], [1026, 576], [943, 576], [942, 575]], "type": "cell", "rows": [10, 10], "columns": [7, 7], "text_list": []}, {"location": [[852, 549], [937, 549], [937, 576], [852, 576]], "bbox": [852, 549, 937, 576], "points": [[853, 550], [854, 549], [936, 549], [937, 550], [937, 575], [936, 576], [854, 576], [852, 574], [853, 573]], "type": "cell", "rows": [10, 10], "columns": [6, 6], "text_list": []}, {"location": [[762, 549], [847, 549], [847, 576], [762, 576]], "bbox": [762, 549, 847, 576], "points": [[763, 550], [764, 549], [846, 549], [847, 550], [847, 575], [846, 576], [763, 576], [762, 575], [763, 574]], "type": "cell", "rows": [10, 10], "columns": [5, 5], "text_list": []}, {"location": [[673, 549], [759, 549], [759, 576], [673, 576]], "bbox": [673, 549, 759, 576], "points": [[673, 550], [674, 549], [756, 549], [759, 552], [757, 553], [759, 554], [759, 574], [756, 576], [674, 576], [673, 575]], "type": "cell", "rows": [10, 10], "columns": [4, 4], "text_list": []}, {"location": [[233, 549], [569, 549], [569, 576], [233, 576]], "bbox": [233, 549, 569, 576], "points": [[235, 550], [236, 549], [568, 549], [569, 550], [569, 575], [568, 576], [235, 576], [233, 575], [235, 574]], "type": "cell", "rows": [10, 10], "columns": [1, 2], "text_list": []}, {"location": [[1213, 519], [1298, 519], [1298, 544], [1213, 544]], "bbox": [1213, 519, 1298, 544], "points": [[1213, 520], [1215, 519], [1296, 519], [1298, 520], [1298, 543], [1296, 544], [1215, 544], [1213, 543]], "type": "cell", "rows": [9, 9], "columns": [10, 10], "text_list": []}, {"location": [[1123, 519], [1209, 519], [1209, 544], [1123, 544]], "bbox": [1123, 519, 1209, 544], "points": [[1123, 520], [1125, 519], [1208, 519], [1209, 520], [1209, 543], [1208, 544], [1125, 544], [1123, 543]], "type": "cell", "rows": [9, 9], "columns": [9, 9], "text_list": []}, {"location": [[1032, 519], [1118, 519], [1118, 544], [1032, 544]], "bbox": [1032, 519, 1118, 544], "points": [[1035, 519], [1116, 519], [1118, 520], [1118, 543], [1116, 544], [1035, 544], [1032, 542], [1033, 541], [1033, 527], [1032, 526], [1033, 525], [1032, 523], [1033, 522], [1032, 521]], "type": "cell", "rows": [9, 9], "columns": [8, 8], "text_list": []}, {"location": [[942, 519], [1027, 519], [1027, 544], [942, 544]], "bbox": [942, 519, 1027, 544], "points": [[942, 520], [943, 519], [1026, 519], [1027, 520], [1027, 543], [1026, 544], [944, 544], [942, 542], [943, 541], [942, 540], [943, 539], [942, 537], [943, 536], [942, 535], [943, 534], [942, 533]], "type": "cell", "rows": [9, 9], "columns": [7, 7], "text_list": []}, {"location": [[853, 519], [937, 519], [937, 544], [853, 544]], "bbox": [853, 519, 937, 544], "points": [[853, 520], [854, 519], [936, 519], [937, 520], [937, 543], [936, 544], [854, 544], [853, 543]], "type": "cell", "rows": [9, 9], "columns": [6, 6], "text_list": []}, {"location": [[763, 519], [847, 519], [847, 544], [763, 544]], "bbox": [763, 519, 847, 544], "points": [[763, 520], [764, 519], [846, 519], [847, 520], [847, 543], [846, 544], [764, 544], [763, 543]], "type": "cell", "rows": [9, 9], "columns": [5, 5], "text_list": []}, {"location": [[673, 519], [759, 519], [759, 544], [673, 544]], "bbox": [673, 519, 759, 544], "points": [[673, 520], [674, 519], [756, 519], [759, 521], [757, 522], [759, 523], [759, 540], [757, 541], [757, 543], [756, 544], [674, 544], [673, 543]], "type": "cell", "rows": [9, 9], "columns": [4, 4], "text_list": []}, {"location": [[383, 519], [569, 519], [569, 544], [383, 544]], "bbox": [383, 519, 569, 544], "points": [[383, 520], [384, 519], [568, 519], [569, 520], [569, 543], [568, 544], [384, 544], [383, 543]], "type": "cell", "rows": [9, 9], "columns": [2, 2], "text_list": []}, {"location": [[233, 519], [377, 519], [377, 544], [233, 544]], "bbox": [233, 519, 377, 544], "points": [[233, 520], [235, 519], [376, 519], [377, 520], [377, 543], [376, 544], [236, 544], [235, 543], [235, 540], [233, 539], [233, 537], [235, 536], [233, 535], [233, 523], [235, 522], [233, 521]], "type": "cell", "rows": [9, 9], "columns": [1, 1], "text_list": []}, {"location": [[1213, 488], [1298, 488], [1298, 515], [1213, 515]], "bbox": [1213, 488, 1298, 515], "points": [[1213, 490], [1215, 488], [1216, 490], [1218, 490], [1219, 488], [1220, 490], [1222, 488], [1296, 488], [1298, 490], [1298, 514], [1296, 515], [1215, 515], [1213, 514]], "type": "cell", "rows": [8, 8], "columns": [10, 10], "text_list": []}, {"location": [[1123, 488], [1209, 488], [1209, 515], [1123, 515]], "bbox": [1123, 488, 1209, 515], "points": [[1123, 490], [1125, 488], [1126, 490], [1132, 490], [1133, 488], [1194, 488], [1195, 490], [1196, 488], [1197, 490], [1198, 488], [1199, 490], [1202, 490], [1203, 488], [1204, 490], [1205, 488], [1206, 490], [1208, 490], [1209, 491], [1209, 514], [1208, 515], [1125, 515], [1123, 514]], "type": "cell", "rows": [8, 8], "columns": [9, 9], "text_list": []}, {"location": [[1032, 488], [1118, 488], [1118, 515], [1032, 515]], "bbox": [1032, 488, 1118, 515], "points": [[1035, 488], [1036, 490], [1037, 488], [1038, 490], [1039, 488], [1040, 490], [1042, 488], [1043, 490], [1044, 488], [1107, 488], [1108, 490], [1109, 488], [1111, 490], [1112, 488], [1116, 488], [1118, 490], [1118, 514], [1116, 515], [1035, 515], [1033, 514], [1033, 513], [1032, 512], [1033, 511], [1032, 509], [1033, 508], [1033, 497], [1032, 495], [1033, 494], [1032, 493], [1032, 491]], "type": "cell", "rows": [8, 8], "columns": [8, 8], "text_list": []}, {"location": [[942, 488], [1027, 488], [1027, 515], [942, 515]], "bbox": [942, 488, 1027, 515], "points": [[942, 490], [943, 488], [944, 488], [946, 490], [949, 490], [950, 488], [951, 490], [953, 488], [1016, 488], [1017, 490], [1018, 488], [1019, 490], [1020, 488], [1022, 490], [1026, 490], [1027, 491], [1027, 514], [1026, 515], [943, 515], [942, 514], [943, 513], [942, 512]], "type": "cell", "rows": [8, 8], "columns": [7, 7], "text_list": []}, {"location": [[853, 488], [937, 488], [937, 515], [853, 515]], "bbox": [853, 488, 937, 515], "points": [[853, 490], [854, 488], [856, 490], [860, 490], [861, 488], [863, 490], [864, 488], [929, 488], [930, 490], [932, 488], [933, 490], [934, 488], [935, 490], [936, 490], [937, 491], [937, 514], [936, 515], [854, 515], [853, 514]], "type": "cell", "rows": [8, 8], "columns": [6, 6], "text_list": []}, {"location": [[762, 488], [847, 488], [847, 515], [762, 515]], "bbox": [762, 488, 847, 515], "points": [[763, 490], [764, 488], [766, 490], [771, 490], [773, 488], [774, 490], [775, 488], [838, 488], [839, 490], [840, 488], [842, 490], [843, 488], [844, 490], [845, 488], [847, 491], [847, 514], [846, 515], [763, 515], [762, 514], [763, 513]], "type": "cell", "rows": [8, 8], "columns": [5, 5], "text_list": []}, {"location": [[673, 488], [759, 488], [759, 515], [673, 515]], "bbox": [673, 488, 759, 515], "points": [[673, 490], [674, 488], [679, 488], [680, 490], [681, 488], [682, 490], [684, 488], [751, 488], [753, 490], [754, 488], [755, 490], [757, 490], [759, 491], [759, 511], [757, 512], [757, 514], [756, 515], [674, 515], [673, 514]], "type": "cell", "rows": [8, 8], "columns": [4, 4], "text_list": []}, {"location": [[383, 488], [569, 488], [569, 515], [383, 515]], "bbox": [383, 488, 569, 515], "points": [[383, 490], [384, 488], [475, 488], [477, 490], [478, 488], [479, 490], [480, 488], [481, 490], [483, 488], [484, 490], [485, 488], [486, 490], [487, 488], [488, 490], [490, 488], [491, 490], [492, 488], [501, 488], [502, 490], [504, 488], [505, 490], [506, 488], [507, 490], [508, 488], [509, 490], [511, 488], [512, 490], [513, 488], [550, 488], [552, 490], [553, 488], [554, 490], [555, 488], [556, 490], [557, 488], [567, 488], [569, 491], [569, 514], [568, 515], [384, 515], [383, 514]], "type": "cell", "rows": [8, 8], "columns": [2, 2], "text_list": []}, {"location": [[233, 488], [377, 488], [377, 515], [233, 515]], "bbox": [233, 488, 377, 515], "points": [[233, 490], [235, 488], [356, 488], [357, 490], [359, 488], [360, 490], [361, 488], [362, 490], [363, 488], [364, 490], [366, 488], [367, 490], [368, 488], [369, 490], [370, 488], [371, 490], [373, 488], [374, 490], [375, 488], [376, 488], [377, 490], [377, 514], [376, 515], [236, 515], [235, 514], [235, 511], [233, 509]], "type": "cell", "rows": [8, 8], "columns": [1, 1], "text_list": []}, {"location": [[1212, 457], [1298, 457], [1298, 484], [1212, 484]], "bbox": [1212, 457, 1298, 484], "points": [[1212, 458], [1213, 457], [1296, 457], [1298, 458], [1298, 483], [1296, 484], [1215, 484], [1213, 483], [1213, 459]], "type": "cell", "rows": [7, 7], "columns": [10, 10], "text_list": []}, {"location": [[1123, 457], [1209, 457], [1209, 484], [1123, 484]], "bbox": [1123, 457, 1209, 484], "points": [[1123, 458], [1125, 457], [1208, 457], [1209, 458], [1209, 483], [1208, 484], [1125, 484], [1123, 483]], "type": "cell", "rows": [7, 7], "columns": [9, 9], "text_list": []}, {"location": [[1032, 457], [1118, 457], [1118, 484], [1032, 484]], "bbox": [1032, 457, 1118, 484], "points": [[1032, 458], [1033, 457], [1116, 457], [1118, 458], [1118, 483], [1116, 484], [1033, 484], [1032, 483], [1032, 479], [1033, 478], [1032, 477], [1033, 475], [1032, 474], [1033, 473], [1032, 472], [1033, 471], [1032, 470], [1033, 468], [1032, 467]], "type": "cell", "rows": [7, 7], "columns": [8, 8], "text_list": []}, {"location": [[942, 457], [1027, 457], [1027, 484], [942, 484]], "bbox": [942, 457, 1027, 484], "points": [[942, 458], [943, 457], [1026, 457], [1027, 458], [1027, 483], [1026, 484], [943, 484], [942, 483]], "type": "cell", "rows": [7, 7], "columns": [7, 7], "text_list": []}, {"location": [[852, 457], [937, 457], [937, 484], [852, 484]], "bbox": [852, 457, 937, 484], "points": [[852, 458], [853, 457], [936, 457], [937, 458], [937, 483], [936, 484], [854, 484], [853, 483], [853, 461], [852, 460]], "type": "cell", "rows": [7, 7], "columns": [6, 6], "text_list": []}, {"location": [[762, 457], [847, 457], [847, 484], [762, 484]], "bbox": [762, 457, 847, 484], "points": [[762, 458], [763, 457], [846, 457], [847, 458], [847, 483], [846, 484], [764, 484], [763, 483], [763, 459]], "type": "cell", "rows": [7, 7], "columns": [5, 5], "text_list": []}, {"location": [[673, 457], [759, 457], [759, 484], [673, 484]], "bbox": [673, 457, 759, 484], "points": [[673, 458], [674, 457], [757, 457], [759, 458], [759, 483], [757, 484], [674, 484], [673, 483]], "type": "cell", "rows": [7, 7], "columns": [4, 4], "text_list": []}, {"location": [[383, 457], [569, 457], [569, 484], [383, 484]], "bbox": [383, 457, 569, 484], "points": [[383, 458], [384, 457], [568, 457], [569, 458], [569, 483], [568, 484], [384, 484], [383, 483]], "type": "cell", "rows": [7, 7], "columns": [2, 2], "text_list": []}, {"location": [[233, 457], [377, 457], [377, 484], [233, 484]], "bbox": [233, 457, 377, 484], "points": [[233, 458], [235, 457], [376, 457], [377, 458], [377, 483], [376, 484], [236, 484], [235, 483], [235, 475], [233, 474], [233, 460], [235, 459]], "type": "cell", "rows": [7, 7], "columns": [1, 1], "text_list": []}, {"location": [[1213, 426], [1298, 426], [1298, 452], [1213, 452]], "bbox": [1213, 426, 1298, 452], "points": [[1213, 428], [1215, 426], [1296, 426], [1298, 428], [1298, 451], [1296, 452], [1215, 452], [1213, 451]], "type": "cell", "rows": [6, 6], "columns": [10, 10], "text_list": []}, {"location": [[1123, 426], [1209, 426], [1209, 452], [1123, 452]], "bbox": [1123, 426, 1209, 452], "points": [[1123, 428], [1125, 426], [1206, 426], [1209, 429], [1209, 451], [1208, 452], [1125, 452], [1123, 451]], "type": "cell", "rows": [6, 6], "columns": [9, 9], "text_list": []}, {"location": [[1032, 426], [1118, 426], [1118, 452], [1032, 452]], "bbox": [1032, 426, 1118, 452], "points": [[1032, 428], [1033, 426], [1116, 426], [1118, 428], [1118, 451], [1116, 452], [1035, 452], [1032, 450]], "type": "cell", "rows": [6, 6], "columns": [8, 8], "text_list": []}, {"location": [[942, 426], [1027, 426], [1027, 452], [942, 452]], "bbox": [942, 426, 1027, 452], "points": [[942, 428], [943, 426], [1026, 426], [1027, 428], [1027, 451], [1026, 452], [943, 452], [942, 451]], "type": "cell", "rows": [6, 6], "columns": [7, 7], "text_list": []}, {"location": [[852, 426], [937, 426], [937, 452], [852, 452]], "bbox": [852, 426, 937, 452], "points": [[852, 428], [853, 426], [936, 426], [937, 428], [937, 451], [936, 452], [854, 452], [853, 451], [853, 431], [852, 430], [853, 429]], "type": "cell", "rows": [6, 6], "columns": [6, 6], "text_list": []}, {"location": [[762, 426], [847, 426], [847, 452], [762, 452]], "bbox": [762, 426, 847, 452], "points": [[762, 428], [763, 426], [846, 426], [847, 428], [847, 451], [846, 452], [764, 452], [763, 451], [763, 429]], "type": "cell", "rows": [6, 6], "columns": [5, 5], "text_list": []}, {"location": [[673, 426], [759, 426], [759, 452], [673, 452]], "bbox": [673, 426, 759, 452], "points": [[673, 428], [674, 426], [756, 426], [759, 429], [759, 451], [757, 452], [674, 452], [673, 451]], "type": "cell", "rows": [6, 6], "columns": [4, 4], "text_list": []}, {"location": [[383, 426], [569, 426], [569, 452], [383, 452]], "bbox": [383, 426, 569, 452], "points": [[383, 428], [384, 426], [568, 426], [569, 428], [569, 451], [568, 452], [384, 452], [383, 451]], "type": "cell", "rows": [6, 6], "columns": [2, 2], "text_list": []}, {"location": [[233, 426], [377, 426], [377, 452], [233, 452]], "bbox": [233, 426, 377, 452], "points": [[233, 428], [235, 426], [376, 426], [377, 428], [377, 451], [376, 452], [236, 452], [233, 450]], "type": "cell", "rows": [6, 6], "columns": [1, 1], "text_list": []}, {"location": [[1213, 395], [1298, 395], [1298, 422], [1213, 422]], "bbox": [1213, 395, 1298, 422], "points": [[1244, 396], [1245, 395], [1273, 395], [1274, 396], [1275, 395], [1277, 396], [1296, 396], [1298, 397], [1298, 421], [1296, 422], [1215, 422], [1213, 421], [1213, 397], [1215, 396]], "type": "cell", "rows": [5, 5], "columns": [10, 10], "text_list": []}, {"location": [[1123, 395], [1209, 395], [1209, 422], [1123, 422]], "bbox": [1123, 395, 1209, 422], "points": [[1153, 396], [1154, 395], [1180, 395], [1181, 396], [1208, 396], [1209, 397], [1209, 421], [1208, 422], [1125, 422], [1123, 421], [1123, 397], [1125, 396]], "type": "cell", "rows": [5, 5], "columns": [9, 9], "text_list": []}, {"location": [[1032, 395], [1118, 395], [1118, 422], [1032, 422]], "bbox": [1032, 395, 1118, 422], "points": [[1064, 396], [1065, 395], [1088, 395], [1089, 396], [1091, 395], [1092, 396], [1116, 396], [1118, 397], [1118, 421], [1116, 422], [1035, 422], [1032, 419], [1032, 414], [1033, 412], [1032, 411], [1033, 410], [1032, 409], [1033, 408], [1032, 406], [1032, 397], [1033, 396]], "type": "cell", "rows": [5, 5], "columns": [8, 8], "text_list": []}, {"location": [[942, 395], [1027, 395], [1027, 422], [942, 422]], "bbox": [942, 395, 1027, 422], "points": [[973, 396], [974, 395], [997, 395], [998, 396], [999, 395], [1001, 396], [1026, 396], [1027, 397], [1027, 421], [1026, 422], [943, 422], [942, 421], [942, 397], [943, 396]], "type": "cell", "rows": [5, 5], "columns": [7, 7], "text_list": []}, {"location": [[852, 395], [937, 395], [937, 422], [852, 422]], "bbox": [852, 395, 937, 422], "points": [[884, 396], [885, 395], [911, 395], [912, 396], [913, 395], [914, 396], [936, 396], [937, 397], [937, 421], [936, 422], [854, 422], [853, 421], [853, 398], [852, 397], [853, 396]], "type": "cell", "rows": [5, 5], "columns": [6, 6], "text_list": []}, {"location": [[762, 395], [847, 395], [847, 422], [762, 422]], "bbox": [762, 395, 847, 422], "points": [[792, 396], [794, 395], [819, 395], [820, 396], [822, 395], [823, 396], [846, 396], [847, 397], [847, 421], [846, 422], [763, 422], [762, 421], [763, 419], [763, 398], [762, 397], [763, 396]], "type": "cell", "rows": [5, 5], "columns": [5, 5], "text_list": []}, {"location": [[673, 395], [759, 395], [759, 422], [673, 422]], "bbox": [673, 395, 759, 422], "points": [[673, 396], [674, 395], [675, 396], [701, 396], [702, 395], [737, 395], [739, 396], [757, 396], [759, 397], [759, 419], [756, 422], [674, 422], [673, 421]], "type": "cell", "rows": [5, 5], "columns": [4, 4], "text_list": []}, {"location": [[383, 395], [569, 395], [569, 422], [383, 422]], "bbox": [383, 395, 569, 422], "points": [[383, 396], [384, 395], [394, 395], [395, 396], [396, 395], [397, 396], [402, 396], [403, 395], [404, 396], [405, 396], [406, 395], [422, 395], [423, 396], [444, 396], [445, 395], [457, 395], [458, 396], [459, 395], [460, 396], [465, 396], [466, 395], [471, 395], [472, 396], [473, 395], [474, 396], [477, 396], [478, 395], [480, 395], [481, 396], [483, 395], [487, 395], [488, 396], [568, 396], [569, 397], [569, 421], [568, 422], [384, 422], [383, 421]], "type": "cell", "rows": [5, 5], "columns": [2, 2], "text_list": []}, {"location": [[1213, 364], [1298, 364], [1298, 391], [1213, 391]], "bbox": [1213, 364, 1298, 391], "points": [[1213, 366], [1215, 364], [1216, 366], [1217, 364], [1296, 364], [1298, 366], [1298, 390], [1296, 391], [1215, 391], [1213, 390]], "type": "cell", "rows": [4, 4], "columns": [10, 10], "text_list": []}, {"location": [[1123, 364], [1209, 364], [1209, 391], [1123, 391]], "bbox": [1123, 364, 1209, 391], "points": [[1123, 366], [1125, 364], [1129, 364], [1130, 366], [1132, 364], [1206, 364], [1209, 367], [1209, 390], [1208, 391], [1125, 391], [1123, 390]], "type": "cell", "rows": [4, 4], "columns": [9, 9], "text_list": []}, {"location": [[1032, 364], [1118, 364], [1118, 391], [1032, 391]], "bbox": [1032, 364, 1118, 391], "points": [[1035, 364], [1036, 366], [1037, 364], [1116, 364], [1118, 366], [1118, 390], [1116, 391], [1035, 391], [1032, 389], [1032, 385], [1033, 384], [1032, 383], [1033, 382], [1032, 381], [1033, 380], [1033, 375], [1032, 374], [1033, 373], [1032, 371], [1033, 370], [1032, 369], [1032, 367]], "type": "cell", "rows": [4, 4], "columns": [8, 8], "text_list": []}, {"location": [[942, 364], [1027, 364], [1027, 391], [942, 391]], "bbox": [942, 364, 1027, 391], "points": [[942, 366], [943, 364], [944, 364], [946, 366], [947, 364], [1023, 364], [1024, 366], [1025, 364], [1026, 364], [1027, 366], [1027, 390], [1026, 391], [943, 391], [942, 390]], "type": "cell", "rows": [4, 4], "columns": [7, 7], "text_list": []}, {"location": [[852, 364], [937, 364], [937, 391], [852, 391]], "bbox": [852, 364, 937, 391], "points": [[853, 366], [854, 364], [856, 366], [857, 364], [934, 364], [935, 366], [936, 364], [937, 366], [937, 390], [936, 391], [854, 391], [852, 389], [853, 388]], "type": "cell", "rows": [4, 4], "columns": [6, 6], "text_list": []}, {"location": [[762, 364], [847, 364], [847, 391], [762, 391]], "bbox": [762, 364, 847, 391], "points": [[762, 366], [763, 364], [764, 364], [766, 366], [767, 364], [846, 364], [847, 366], [847, 390], [846, 391], [763, 391], [762, 390], [763, 389], [763, 367]], "type": "cell", "rows": [4, 4], "columns": [5, 5], "text_list": []}, {"location": [[673, 364], [759, 364], [759, 391], [673, 391]], "bbox": [673, 364, 759, 391], "points": [[673, 366], [674, 364], [756, 364], [759, 367], [759, 389], [756, 391], [674, 391], [673, 390]], "type": "cell", "rows": [4, 4], "columns": [4, 4], "text_list": []}, {"location": [[383, 364], [569, 364], [569, 391], [383, 391]], "bbox": [383, 364, 569, 391], "points": [[383, 366], [384, 364], [567, 364], [569, 367], [569, 390], [568, 391], [384, 391], [383, 390]], "type": "cell", "rows": [4, 4], "columns": [2, 2], "text_list": []}, {"location": [[233, 364], [377, 364], [377, 422], [233, 422]], "bbox": [233, 364, 377, 422], "points": [[235, 366], [236, 364], [239, 364], [240, 366], [242, 364], [243, 366], [244, 364], [245, 366], [246, 364], [247, 366], [249, 364], [250, 366], [251, 364], [252, 366], [253, 364], [254, 366], [259, 366], [260, 364], [261, 366], [263, 364], [264, 366], [265, 364], [266, 366], [267, 364], [268, 366], [270, 364], [271, 366], [272, 364], [273, 366], [274, 364], [276, 366], [277, 364], [278, 366], [279, 364], [280, 366], [374, 366], [375, 364], [376, 364], [377, 366], [377, 421], [376, 422], [236, 422], [235, 421], [235, 396], [233, 395], [235, 394], [233, 392], [233, 390], [235, 389], [233, 388], [233, 383], [235, 382]], "type": "cell", "rows": [4, 5], "columns": [1, 1], "text_list": []}, {"location": [[1213, 334], [1298, 334], [1298, 360], [1213, 360]], "bbox": [1213, 334, 1298, 360], "points": [[1213, 335], [1215, 334], [1296, 334], [1298, 335], [1298, 359], [1296, 360], [1215, 360], [1213, 359]], "type": "cell", "rows": [3, 3], "columns": [10, 10], "text_list": []}, {"location": [[1123, 334], [1209, 334], [1209, 360], [1123, 360]], "bbox": [1123, 334, 1209, 360], "points": [[1123, 335], [1125, 334], [1208, 334], [1209, 335], [1209, 359], [1208, 360], [1125, 360], [1123, 359]], "type": "cell", "rows": [3, 3], "columns": [9, 9], "text_list": []}, {"location": [[1032, 334], [1118, 334], [1118, 360], [1032, 360]], "bbox": [1032, 334, 1118, 360], "points": [[1032, 335], [1033, 334], [1116, 334], [1118, 335], [1118, 359], [1116, 360], [1033, 360], [1032, 359], [1032, 355], [1033, 354], [1032, 353], [1033, 352], [1032, 350], [1033, 349], [1032, 348], [1033, 347], [1033, 345], [1032, 343], [1033, 342], [1032, 341]], "type": "cell", "rows": [3, 3], "columns": [8, 8], "text_list": []}, {"location": [[942, 334], [1027, 334], [1027, 360], [942, 360]], "bbox": [942, 334, 1027, 360], "points": [[942, 335], [943, 334], [1026, 334], [1027, 335], [1027, 359], [1026, 360], [943, 360], [942, 359]], "type": "cell", "rows": [3, 3], "columns": [7, 7], "text_list": []}, {"location": [[852, 334], [937, 334], [937, 360], [852, 360]], "bbox": [852, 334, 937, 360], "points": [[854, 334], [936, 334], [937, 335], [937, 359], [936, 360], [854, 360], [853, 359], [853, 337], [852, 336]], "type": "cell", "rows": [3, 3], "columns": [6, 6], "text_list": []}, {"location": [[762, 334], [847, 334], [847, 360], [762, 360]], "bbox": [762, 334, 847, 360], "points": [[762, 335], [763, 334], [846, 334], [847, 335], [847, 359], [846, 360], [764, 360], [763, 359], [763, 336]], "type": "cell", "rows": [3, 3], "columns": [5, 5], "text_list": []}, {"location": [[673, 334], [759, 334], [759, 360], [673, 360]], "bbox": [673, 334, 759, 360], "points": [[673, 335], [674, 334], [756, 334], [759, 336], [759, 359], [757, 360], [674, 360], [673, 359]], "type": "cell", "rows": [3, 3], "columns": [4, 4], "text_list": []}, {"location": [[383, 334], [569, 334], [569, 360], [383, 360]], "bbox": [383, 334, 569, 360], "points": [[383, 335], [384, 334], [568, 334], [569, 335], [569, 359], [568, 360], [384, 360], [383, 359]], "type": "cell", "rows": [3, 3], "columns": [2, 2], "text_list": []}, {"location": [[233, 334], [377, 334], [377, 360], [233, 360]], "bbox": [233, 334, 377, 360], "points": [[233, 335], [235, 334], [376, 334], [377, 335], [377, 359], [376, 360], [236, 360], [235, 359], [235, 356], [233, 355], [233, 339], [235, 337], [233, 336]], "type": "cell", "rows": [3, 3], "columns": [1, 1], "text_list": []}, {"location": [[1213, 302], [1298, 302], [1298, 329], [1213, 329]], "bbox": [1213, 302, 1298, 329], "points": [[1213, 304], [1215, 302], [1296, 302], [1298, 304], [1298, 328], [1296, 329], [1215, 329], [1213, 328]], "type": "cell", "rows": [2, 2], "columns": [10, 10], "text_list": []}, {"location": [[1123, 302], [1209, 302], [1209, 329], [1123, 329]], "bbox": [1123, 302, 1209, 329], "points": [[1123, 304], [1125, 302], [1206, 302], [1209, 305], [1209, 328], [1208, 329], [1125, 329], [1123, 328]], "type": "cell", "rows": [2, 2], "columns": [9, 9], "text_list": []}, {"location": [[1032, 302], [1118, 302], [1118, 329], [1032, 329]], "bbox": [1032, 302, 1118, 329], "points": [[1032, 304], [1033, 302], [1116, 302], [1118, 304], [1118, 328], [1116, 329], [1035, 329], [1032, 327], [1032, 325], [1033, 323], [1032, 322], [1033, 321], [1033, 314], [1032, 313], [1033, 312], [1032, 311], [1033, 309], [1032, 308]], "type": "cell", "rows": [2, 2], "columns": [8, 8], "text_list": []}, {"location": [[942, 302], [1027, 302], [1027, 329], [942, 329]], "bbox": [942, 302, 1027, 329], "points": [[942, 304], [943, 302], [1026, 302], [1027, 304], [1027, 328], [1026, 329], [944, 329], [942, 327]], "type": "cell", "rows": [2, 2], "columns": [7, 7], "text_list": []}, {"location": [[852, 302], [937, 302], [937, 329], [852, 329]], "bbox": [852, 302, 937, 329], "points": [[852, 304], [853, 302], [936, 302], [937, 304], [937, 328], [936, 329], [854, 329], [853, 328], [853, 305]], "type": "cell", "rows": [2, 2], "columns": [6, 6], "text_list": []}, {"location": [[762, 302], [847, 302], [847, 329], [762, 329]], "bbox": [762, 302, 847, 329], "points": [[762, 304], [763, 302], [846, 302], [847, 304], [847, 328], [846, 329], [764, 329], [763, 328], [763, 305]], "type": "cell", "rows": [2, 2], "columns": [5, 5], "text_list": []}, {"location": [[673, 302], [759, 302], [759, 329], [673, 329]], "bbox": [673, 302, 759, 329], "points": [[673, 304], [674, 302], [756, 302], [759, 305], [759, 328], [757, 329], [674, 329], [673, 328]], "type": "cell", "rows": [2, 2], "columns": [4, 4], "text_list": []}, {"location": [[383, 302], [569, 302], [569, 329], [383, 329]], "bbox": [383, 302, 569, 329], "points": [[383, 304], [384, 302], [568, 302], [569, 304], [569, 328], [568, 329], [384, 329], [383, 328]], "type": "cell", "rows": [2, 2], "columns": [2, 2], "text_list": []}, {"location": [[1213, 273], [1298, 273], [1298, 298], [1213, 298]], "bbox": [1213, 273, 1298, 298], "points": [[1213, 274], [1215, 273], [1296, 273], [1298, 274], [1298, 297], [1296, 298], [1215, 298], [1213, 297]], "type": "cell", "rows": [1, 1], "columns": [10, 10], "text_list": []}, {"location": [[1123, 273], [1209, 273], [1209, 298], [1123, 298]], "bbox": [1123, 273, 1209, 298], "points": [[1123, 274], [1125, 273], [1208, 273], [1209, 274], [1209, 297], [1208, 298], [1125, 298], [1123, 297]], "type": "cell", "rows": [1, 1], "columns": [9, 9], "text_list": []}, {"location": [[1032, 273], [1118, 273], [1118, 298], [1032, 298]], "bbox": [1032, 273, 1118, 298], "points": [[1032, 274], [1033, 273], [1116, 273], [1118, 274], [1118, 297], [1116, 298], [1035, 298], [1032, 295]], "type": "cell", "rows": [1, 1], "columns": [8, 8], "text_list": []}, {"location": [[942, 273], [1027, 273], [1027, 298], [942, 298]], "bbox": [942, 273, 1027, 298], "points": [[942, 274], [943, 273], [1026, 273], [1027, 274], [1027, 297], [1026, 298], [943, 298], [942, 297]], "type": "cell", "rows": [1, 1], "columns": [7, 7], "text_list": []}, {"location": [[852, 273], [937, 273], [937, 298], [852, 298]], "bbox": [852, 273, 937, 298], "points": [[852, 274], [853, 273], [936, 273], [937, 274], [937, 297], [936, 298], [854, 298], [853, 297], [853, 281], [852, 280]], "type": "cell", "rows": [1, 1], "columns": [6, 6], "text_list": []}, {"location": [[762, 273], [847, 273], [847, 298], [762, 298]], "bbox": [762, 273, 847, 298], "points": [[762, 274], [763, 273], [846, 273], [847, 274], [847, 297], [846, 298], [763, 298], [762, 297], [763, 295], [763, 277], [762, 276]], "type": "cell", "rows": [1, 1], "columns": [5, 5], "text_list": []}, {"location": [[673, 273], [759, 273], [759, 298], [673, 298]], "bbox": [673, 273, 759, 298], "points": [[673, 274], [674, 273], [757, 273], [759, 274], [759, 295], [756, 298], [674, 298], [673, 297]], "type": "cell", "rows": [1, 1], "columns": [4, 4], "text_list": []}, {"location": [[573, 273], [668, 273], [668, 576], [573, 576]], "bbox": [573, 273, 668, 576], "points": [[574, 274], [575, 273], [667, 273], [668, 274], [668, 575], [667, 576], [574, 576], [573, 575], [574, 574], [574, 457], [575, 456], [575, 453], [574, 452]], "type": "cell", "rows": [1, 10], "columns": [3, 3], "text_list": []}, {"location": [[383, 273], [569, 273], [569, 298], [383, 298]], "bbox": [383, 273, 569, 298], "points": [[383, 274], [384, 273], [568, 273], [569, 274], [569, 297], [568, 298], [384, 298], [383, 297]], "type": "cell", "rows": [1, 1], "columns": [2, 2], "text_list": []}, {"location": [[233, 273], [377, 273], [377, 329], [233, 329]], "bbox": [233, 273, 377, 329], "points": [[233, 274], [235, 273], [376, 273], [377, 274], [377, 328], [376, 329], [236, 329], [235, 328], [235, 305], [233, 304], [235, 302], [233, 301], [233, 294], [235, 293], [233, 292], [235, 291], [233, 290], [235, 288], [235, 276]], "type": "cell", "rows": [1, 2], "columns": [1, 1], "text_list": []}, {"location": [[166, 272], [231, 272], [231, 576], [166, 576]], "bbox": [166, 272, 231, 576], "points": [[166, 273], [167, 272], [168, 273], [230, 273], [231, 274], [231, 278], [230, 279], [230, 329], [231, 330], [230, 332], [230, 339], [231, 340], [231, 353], [230, 354], [230, 360], [231, 361], [230, 362], [230, 385], [231, 387], [231, 392], [230, 394], [230, 430], [231, 431], [231, 459], [230, 460], [230, 464], [231, 465], [231, 467], [230, 468], [230, 484], [231, 485], [231, 486], [230, 487], [230, 488], [231, 490], [230, 491], [231, 492], [231, 508], [230, 509], [230, 526], [231, 527], [231, 534], [230, 535], [230, 571], [231, 573], [231, 575], [230, 576], [167, 576], [166, 575]], "type": "cell", "rows": [1, 10], "columns": [0, 0], "text_list": []}, {"location": [[1212, 240], [1298, 240], [1298, 267], [1212, 267]], "bbox": [1212, 240, 1298, 267], "points": [[1215, 240], [1216, 242], [1217, 240], [1296, 240], [1298, 242], [1298, 266], [1296, 267], [1215, 267], [1212, 265], [1213, 264], [1212, 263], [1212, 243]], "type": "cell", "rows": [0, 0], "columns": [10, 10], "text_list": []}, {"location": [[1123, 240], [1209, 240], [1209, 267], [1123, 267]], "bbox": [1123, 240, 1209, 267], "points": [[1123, 242], [1125, 240], [1126, 242], [1127, 242], [1128, 240], [1170, 240], [1171, 242], [1173, 240], [1183, 240], [1184, 242], [1185, 240], [1190, 240], [1191, 242], [1192, 240], [1208, 240], [1209, 242], [1209, 266], [1208, 267], [1125, 267], [1123, 266]], "type": "cell", "rows": [0, 0], "columns": [9, 9], "text_list": []}, {"location": [[1032, 240], [1118, 240], [1118, 267], [1032, 267]], "bbox": [1032, 240, 1118, 267], "points": [[1035, 240], [1036, 242], [1037, 240], [1038, 242], [1039, 240], [1116, 240], [1118, 242], [1118, 266], [1116, 267], [1035, 267], [1032, 265], [1032, 243]], "type": "cell", "rows": [0, 0], "columns": [8, 8], "text_list": []}, {"location": [[942, 240], [1027, 240], [1027, 267], [942, 267]], "bbox": [942, 240, 1027, 267], "points": [[942, 242], [943, 240], [944, 240], [946, 242], [947, 242], [948, 240], [1026, 240], [1027, 242], [1027, 266], [1026, 267], [943, 267], [942, 266]], "type": "cell", "rows": [0, 0], "columns": [7, 7], "text_list": []}, {"location": [[852, 240], [937, 240], [937, 267], [852, 267]], "bbox": [852, 240, 937, 267], "points": [[854, 240], [856, 242], [857, 240], [858, 242], [859, 240], [860, 242], [861, 240], [873, 240], [874, 242], [875, 240], [877, 242], [880, 242], [881, 240], [885, 240], [886, 242], [887, 240], [892, 240], [893, 242], [894, 240], [897, 240], [898, 242], [899, 240], [936, 240], [937, 242], [937, 266], [936, 267], [854, 267], [852, 265], [852, 243]], "type": "cell", "rows": [0, 0], "columns": [6, 6], "text_list": []}, {"location": [[762, 240], [847, 240], [847, 267], [762, 267]], "bbox": [762, 240, 847, 267], "points": [[762, 242], [763, 240], [764, 240], [766, 242], [767, 242], [768, 240], [769, 242], [770, 242], [771, 240], [781, 240], [782, 242], [785, 242], [787, 240], [788, 242], [790, 242], [791, 240], [795, 240], [796, 242], [797, 240], [803, 240], [804, 242], [805, 240], [846, 240], [847, 242], [847, 266], [846, 267], [763, 267], [762, 266]], "type": "cell", "rows": [0, 0], "columns": [5, 5], "text_list": []}, {"location": [[673, 240], [759, 240], [759, 267], [673, 267]], "bbox": [673, 240, 759, 267], "points": [[673, 242], [674, 240], [675, 242], [678, 242], [679, 240], [680, 242], [681, 240], [693, 240], [694, 242], [695, 240], [697, 242], [700, 242], [701, 240], [705, 240], [706, 242], [707, 240], [712, 240], [713, 242], [714, 240], [756, 240], [759, 243], [759, 265], [756, 267], [674, 267], [673, 266]], "type": "cell", "rows": [0, 0], "columns": [4, 4], "text_list": []}, {"location": [[573, 240], [668, 240], [668, 267], [573, 267]], "bbox": [573, 240, 668, 267], "points": [[601, 242], [602, 240], [630, 240], [631, 242], [632, 242], [633, 240], [637, 240], [638, 242], [667, 242], [668, 243], [668, 266], [667, 267], [574, 267], [573, 266], [573, 243], [574, 242]], "type": "cell", "rows": [0, 0], "columns": [3, 3], "text_list": []}, {"location": [[166, 240], [569, 240], [569, 267], [166, 267]], "bbox": [166, 240, 569, 267], "points": [[166, 242], [167, 240], [168, 242], [568, 242], [569, 243], [569, 266], [568, 267], [167, 267], [166, 266]], "type": "cell", "rows": [0, 0], "columns": [0, 2], "text_list": []}, {"location": [[161, 236], [1303, 236], [1303, 581], [161, 581]], "bbox": [161, 236, 1303, 581], "points": [[162, 236], [162, 238], [161, 239], [161, 580], [162, 581], [1302, 581], [1303, 580], [1303, 237], [166, 237], [164, 236]], "type": "table", "contains": [85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182]}]}}]


================================================
FILE: libs/kotaemon/tests/resources/html/dummy.html
================================================
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta http-equiv="Content-Style-Type" content="text/css" /><title></title></head><body style="text-align:justify; line-height:18pt; widows:0; orphans:0; font-family:'ＭＳ 明朝'; font-size:10.5pt"><div><div style="-aw-headerfooter-type:header-primary; clear:both"><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">細則　本社編（情報システム部）　　　　　　　　　　　</span><span style="width:37.85pt; font-family:'ＭＳ ゴシック'; font-size:10pt; display:inline-block">&#xa0;</span><span style="width:42.55pt; font-family:'ＭＳ ゴシック'; font-size:10pt; display:inline-block">&#xa0;</span><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">　分類番号　１５７３００</span></p><p style="margin-top:0pt; margin-right:5.65pt; margin-bottom:0pt; text-align:right"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">２０２０．２</span></p><p style="margin-top:0pt; margin-bottom:0pt; line-height:150%; font-size:9pt"><span style="font-family:'Times New Roman'; -aw-import:ignore">&#xa0;</span></p></div><p style="margin-top:0pt; margin-bottom:0pt"></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="height:0pt; text-align:left; display:block; position:absolute; z-index:0"><img src="dummy_image.png" width="605" height="50" alt="" style="margin-top:8.45pt; margin-left:1.55pt; -aw-left-pos:2.05pt; -aw-rel-hpos:column; -aw-rel-vpos:paragraph; -aw-top-pos:8.95pt; -aw-wrap-type:none; position:absolute" /></span><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-left:21pt; margin-bottom:0pt; text-indent:-21pt; -aw-import:list-item; -aw-list-level-number:0; -aw-list-number-format:'%0．'; -aw-list-number-styles:'decimalFullWidth'; -aw-list-number-values:'1'; -aw-list-padding-sml:1pt"><span style="-aw-import:ignore"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">１．</span><span style="width:1pt; font:7pt 'Times New Roman'; display:inline-block"> </span></span><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくん紛失・盗難時の取扱</span></p><p style="margin-top:0pt; margin-left:16.9pt; margin-bottom:2.4pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくんの紛失・盗難の際は、速やかに停止依頼処理を入力するとともに、報告書を起票します。</span></p><table cellspacing="0" cellpadding="0" style="margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse"><tr style="height:19.8pt"><td style="width:31.05pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">ＮＯ</span></p></td><td style="width:80.6pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">項目</span></p></td><td style="width:303.6pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">内容</span></p></td></tr><tr style="height:85.5pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:solid; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">１</span></p></td><td style="width:80.6pt; border-style:solid solid dotted; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">対象のスパット</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">くん確認</span></p></td><td style="width:303.6pt; border-top-style:solid; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:2.4pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">紛失・盗難に気づいた時には、対象のスパットくんの端末識別番号を確認します。</span></p><p style="margin-top:2.4pt; margin-left:11pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※盗難の場合は警察への届出も必要です。</span></p><p style="margin-top:0pt; margin-left:11pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※紛失・盗難の場合は盗難・紛失事故報告も必要です。</span></p><p style="margin-top:0pt; margin-left:23.75pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">あいリクエスト（総務室（大阪））</span></p><p style="margin-top:0pt; margin-left:23.75pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:16pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">『盗難・紛失事故兼個人情報等事故報告』</span></p></td></tr><tr style="height:35.4pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">２</span></p></td><td style="width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">報告書の起票</span></p></td><td style="width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">あいリクエスト（システム業務室）-『モバイル決済端末（スパットくん）紛失・盗難報告書』を起票します。</span></p></td></tr><tr style="height:62.2pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">３</span></p></td><td style="width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">停止依頼入力</span></p></td><td style="width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">モバイル端末管理ウェブより停止依頼処理を入力します。</span></p><p style="margin-top:0pt; margin-left:11.55pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※停止依頼入力により該当スパットくんは使用不可となります。</span></p></td></tr><tr style="height:45.75pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">４</span></p></td><td style="width:80.6pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">報告書の承認と</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">担当室への報告</span></p></td><td style="width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">所属長はあいリクエストにて申請・送付された報告書を確認・承認します。承認後、報告書をあいリクエスト（総務室（大阪））－『紛失・盗難事故　兼　個人情報等事故報告』に添付し報告します。（関連細則：</span><a href="https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00044505" style="text-decoration:none"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff">本支社編11110／210060「紛失・盗難事故等の被害報告」</span></a><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">を参照）</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※日計処理の実施が「無」の場合本社担当室（システム業務室、</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">契約審査室、収納サービス室、損保サービス室）へ連絡する。</span></p></td></tr></table><p style="margin-top:0pt; margin-bottom:0pt; text-indent:136.5pt; line-height:17pt"><a href="https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928" style="text-decoration:none"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:130pt; line-height:17pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:10pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">＜スパットくん再発見時の対応＞</span></p><table cellspacing="0" cellpadding="0" style="margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse"><tr style="height:19.8pt"><td style="width:31.05pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">ＮＯ</span></p></td><td style="width:80.6pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">項目</span></p></td><td style="width:303.6pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">内容</span></p></td></tr><tr style="height:49.7pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">１</span></p></td><td style="width:80.6pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">端末設置組織の</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">確認</span></p></td><td style="width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくんの設置状況照会を行い、紛失・盗難となったスパットくんであるか確認します。</span></p></td></tr><tr style="height:56.75pt; page-break-inside:avoid"><td style="width:31.05pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">２</span></p></td><td style="width:80.6pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">端末停止解除</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">入力</span></p></td><td style="width:303.6pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">モバイル端末管理ウェブより停止解除処理を入力します。</span></p><p style="margin-top:0pt; margin-left:20.7pt; margin-bottom:0pt; text-indent:-10.1pt; text-align:justify; line-height:16pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※スパットくんの利用再開は、支社にて停止解除入力から２営業日以上経過してから利用下さい。</span></p></td></tr></table><p style="margin-top:0pt; margin-bottom:0pt; text-indent:9.75pt; text-align:center"><span>　　　　　　　　</span><a href="https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928" style="text-decoration:none"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><p style="margin-top:0pt; margin-left:21pt; margin-bottom:0pt; text-indent:-21pt; -aw-import:list-item; -aw-list-level-number:0; -aw-list-number-format:'%0．'; -aw-list-number-styles:'decimalFullWidth'; -aw-list-number-values:'2'; -aw-list-padding-sml:1pt"><span style="-aw-import:ignore"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">２．</span><span style="width:1pt; font:7pt 'Times New Roman'; display:inline-block"> </span></span><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくん故障時の取扱</span></p><p style="margin-top:0pt; margin-left:15.95pt; margin-bottom:2.4pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくんが故障した場合には、代替機への交換と故障機の返却を行います。</span></p><table cellspacing="0" cellpadding="0" style="margin-left:16.8pt; border:0.75pt solid #000000; -aw-border:0.5pt single; border-collapse:collapse"><tr style="height:19.8pt"><td style="width:30.35pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">ＮＯ</span></p></td><td style="width:78.25pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">項目</span></p></td><td style="width:317.35pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:solid; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; background-color:#e0e0e0; -aw-border-bottom:0.5pt single; -aw-border-left:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">内容</span></p></td></tr><tr style="height:55.55pt"><td style="width:30.35pt; border-top-style:solid; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">１</span></p></td><td style="width:78.25pt; border-style:solid solid dotted; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">故障内容の確認と報告書の起票</span></p></td><td style="width:317.35pt; border-top-style:solid; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt single"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">スパットくんが故障した場合は、あいリクエスト（システム業務室）-『モバイル決済端末（スパットくん）故障報告書』を起票します。</span></p></td></tr><tr style="height:106pt; page-break-inside:avoid"><td style="width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">２</span></p></td><td style="width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">修理依頼入力</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'Times New Roman'; font-size:10pt; font-weight:bold; -aw-import:ignore">&#xa0;</span></p></td><td style="width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.2pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">『モバイル端末管理ウェブ』より修理依頼処理を入力します。修理</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.2pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">依頼処理の入力により、代替機が送付されます。</span><br /><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">その際、「モバイル決済端末修理依頼書」を印刷し、「モバイル決済端末（スパットくん）故障報告書」を参照して必要事項を記入します。</span></p></td></tr><tr style="height:76.5pt; page-break-inside:avoid"><td style="width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">３</span></p></td><td style="width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">報告書の承認と担当室への報告</span></p></td><td style="width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-left:0.2pt; margin-bottom:0pt; text-indent:-0.2pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">所属長はあいリクエストにて申請・送付された報告書を確認・承認し、システム業務室に報告します。</span></p></td></tr><tr style="height:86.2pt; page-break-inside:avoid"><td style="width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">４</span></p></td><td style="width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">梱包</span></p></td><td style="width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">以下をセットで梱包します。</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">・故障したスパットくん本体</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">・付属品（充電アダプタ、コード、タッチペン、ストラップ）</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:10.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">・モバイル決済端末修理依頼書</span></p><p style="margin-top:0pt; margin-left:4pt; margin-bottom:0pt; text-indent:10.1pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※各所属にて保管のスパットくん送付箱に梱包します</span></p><p style="margin-top:0pt; margin-left:4pt; margin-bottom:0pt; text-indent:10.1pt; text-align:justify; line-height:18pt"><span style="font-family:'Times New Roman'; font-size:10pt; -aw-import:ignore">&#xa0;</span></p></td></tr><tr style="height:86.2pt; page-break-inside:avoid"><td style="width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">５</span></p></td><td style="width:78.25pt; border-style:dotted solid; border-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">代替機の受取と</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">故障機の送付</span></p><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'Times New Roman'; font-size:10pt; font-weight:bold; -aw-import:ignore">&#xa0;</span></p></td><td style="width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; border-bottom-style:dotted; border-bottom-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-bottom:0.5pt dot; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">入力後数日でスパットくんの代替機が到着します。</span></p><p style="margin-top:0pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">受け取ると同時に、配送業者（日本通運）に故障したスパットくん、および「モバイル決済端末修理依頼書」を渡します。</span></p><p style="margin-top:0pt; margin-left:0.1pt; margin-bottom:2.4pt; text-indent:10pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">※「モバイル決済端末修理依頼書」を忘れずに同梱下さい。</span></p><p style="margin-top:2.4pt; margin-left:14.8pt; margin-bottom:0pt; text-indent:-14.8pt; text-align:justify; line-height:18pt"><span style="font-family:'Times New Roman'; font-size:10pt; -aw-import:ignore">&#xa0;</span></p></td></tr><tr style="height:52.15pt; page-break-inside:avoid"><td style="width:30.35pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:center; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">６</span></p></td><td style="width:78.25pt; border-top-style:dotted; border-top-width:0.75pt; border-right-style:solid; border-right-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-right:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:0pt; margin-bottom:0pt; text-indent:0.3pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">代替機の端末受取入力</span></p></td><td style="width:317.35pt; border-top-style:dotted; border-top-width:0.75pt; border-left-style:solid; border-left-width:0.75pt; padding-right:4.58pt; padding-left:4.58pt; vertical-align:top; -aw-border-left:0.5pt single; -aw-border-top:0.5pt dot"><p style="margin-top:2.4pt; margin-left:0.1pt; margin-bottom:0pt; text-indent:-0.1pt; text-align:justify; line-height:18pt"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt">モバイル端末管理ウェブより代替機の端末受取処理を入力します。</span></p></td></tr></table><p style="margin-top:0pt; margin-bottom:0pt; text-indent:9.75pt; text-align:center"><span>　　　　　　　　　　</span><a href="https://a3.itr.sumitomolife.co.jp/sso/dfw/kn/zc/open/zcko0102.do?DOCID=H00028928" style="text-decoration:none"><span style="font-family:'ＭＳ ゴシック'; font-size:10pt; text-decoration:underline; color:#0000ff">【関連マニュアル】　スパットくん・モバイル管理ウェブ操作マニュアル</span></a></p><p style="margin-top:0pt; margin-bottom:0pt"><span style="font-family:'ＭＳ ゴシック'; -aw-import:ignore">&#xa0;</span></p><div style="-aw-headerfooter-type:footer-primary; clear:both"><p style="margin-top:0pt; margin-bottom:0pt; text-align:center"><span>- </span><span style="-aw-field-start:true"></span><span style="-aw-field-code:' PAGE '"></span><span style="-aw-field-separator:true"></span><span>1</span><span style="-aw-field-end:true"></span><span> -</span></p></div></div></body></html>


================================================
FILE: libs/kotaemon/tests/resources/policy.md
================================================
# 5 年ごと配当付特定状態保障定期保険特約条項 目次

## 1. この特約の概要

第 1 条 特約保険金の支払

第 2 条 特約保険金の支払に関する補則

第 3 条 特約保険金の免責事由に該当した場合の取扱

第 4 条 特約保険金の請求、支払時期および支払場所

第 5 条 特約の保険料払込の免除

第 6 条 特約の締結

第 7 条 特約の責任開始期

第 8 条 特約の保険期間および保険料払込期間

第 9 条 特約の保険料の払込

第 10 条 猶予期間中の保険事故亡保険料の取扱

第 11 条 特約の失効

第 12 条 特約の復活

第 13 条 告知義務

第 14 条 告知義務違反による解除

第 15 条 特約を解除できない場合

第 16 条 重大事由による解除

第 17 条 特約の解約

第 18 条 特約の返還金

第 19 条 特約の消滅とみなす場合

第 20 条 債権者等により特約が解約される場合の取扱

第 21 条 特約保険金額の減額

第 22 条 特約の更新

第 23 条 特約の契約者配当金

第 24 条 主契約の内容变更に伴う特約の取扱

第 25 条 主契約について保険料の自動貸付の規定を適用 する場合の取扱

第 26 条 主契約を払済保険に变更する場合の取扱

第 27 条 法令等の改正等に伴う特約障害保険金および特 約介護保険金の支払事由に関する規定の变更

第 28 条 管轄裁判所

第 29 条 契約内容の登録

第 30 条 主約款の規定の準用

第 31 条 5 年ごと配当付定期保険または 5 年ごと利差配 当付定期保険に付加した場合の特則

第 32 条 5 年ごと配当付生存給付金付定期保険または 5 年己゙と利差配当付生存給付金付定期保険に付加 した場合の特則
第 33 条 5 年ごと配当付逓増定期保険または 5 年ごと利 差配当付逓增定期保険沉付加した場合の特則

第 34 条 5 年ごと配当付養老保険または 5 年ごと利差配 当付養老保険に付加した場合の特則

第 35 条 5 年ごと配当付終身保険に 5 年ごと配当付年金 支払移行特約等を付加した場合または 5 年ごと 利差配当付終身保険厄 5 年己゙と利差配当付年金 支払移行特約等を付加した場合の特約の取扱

第 36 条 保険料払込期間が終身の 5 年ごと配当付終身保 険または保険料払込期間が終身の 5 年ごと利差 配当付終身保険尺付加した場合の特則

第 37 条 5 年ごと配当付更新型終身移行保険または 5 年 ごと利差配当付更新型終身移行保険に付加した 場合の特則

第 38 条 5 年ごと配当付更新型終身移行保険または 5 年 ごと利差配当付更新型終身移行保険に 5 年ごと 配当付年金支払移行特約等を付加した場合の特 約の取扱

第 39 条 5 年ごと配当付介護年金終身保障保険または 5 年ごと利差配当付介護年金終身保障保険に付加 した場合の特則

第 40 条 5 年己゙と配当付終身医療保険または 5 年ごと利 差配当付経身医療保険汇付加した場合の特則

第 41 条 5 年ごと配当付介護年金保険（解約返還金なし 型）に付加した場合の特則

第 42 条 転換後契約または变更後契約に付加した場合の 特則

第 43 条 転換特約、部分保障变更特約または家族内保障 承継特約を付加した場合の特則

第 44 条 特別条件を付けた場合の特則

第 45 条 契約日が平成 22 年 3 月 1 日以前の主契約に付加 した場合の特則

第 46 条 契約日が平成 24 年 10 月 1 日以前の主契約に付加 した場合の特約特定疾病保険金、特約障害保険 金および特約介護保険金の代理請求

## 2. 5 年ごと配当付特定状態保障定期保険特約条項

## 3. (この特約の概要)

(2015 年 5 月 21 日改正)

この特約は、つぎの給付を行うことを主な内容とするものです。なお、特約死亡保険金額、特約特定疾病保険金額、特 約障害保険金額および特約介護保険金額は同額です。

|                    |                                                                                         給付の内容                                                                                          |
| :----------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|   特約死亡保険金   |                                                                 被保険者がこの特約の保険期間中に死亡したときに支払います。                                                                  |
| 特約特定疾病保険金 | $\begin{array}{l}\text { 被保険者がこの特約の保険期間中に特定の疾病（悪性新生物（がん）、急性心筋梗塞または脳 } \\ \text { 卒中）により所定の状態に該当したときに支払います。 }\end{array}$ |
|   特約障害保険金   |                               ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-1.jpg?height=93&width=1317&top_left_y=2514&top_left_x=537)                               |
|   特約介護保険金   |                                                     被保険者がこの特約の保険期間中に傷害または疾病により所定の要介護状態に該当したとき                                                      |

1。この特約において支払う特約保険金はつぎのとおりです。

|                                                                                                                                                           |           $\begin{array}{l}\text { 特約保険金を支払う場合（以下「支払事由」 } \\ \text { といいます。） }\end{array}$           |                                                          支払額                                                           |                                                                                                          受取人                                                                                                           |                                                                                                                                            $\begin{array}{l}\text { 支払事由に該当しても特約保険金を支払 } \\ \text { わない場合 (以下「免責事由」といいます。) }\end{array}$                                                                                                                                            |
| :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|      $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 死 } \\ 亡 \\ \text { 亡 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 }\end{array}$      |                $\begin{array}{l}\text { 被保険者がこの特約の保険期間中に死亡し } \\ \text { たとき }\end{array}$                | $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 } \\ \text { 額 }\end{array}$ |                                                 $\begin{array}{l}\text { 特 } \\ \text { 絢 } \\ \text { 㨐 } \\ \text { 険 } \\ \text { 善 } \\ \text { 聚 }\end{array}$                                                 | $\begin{array}{l}\text { つぎのいずれかにより左記の支払事由が } \\ \text { 生じたとき } \\ \text { (1) この特約の責任開始期（復活の取扱が } \\ \text { 行われた後は、最後の復活の際の責任開 } \\ \text { 始期。以下同じ。）の属するもからその } \\ \text { 日を含めて } 3 \text { 年以内の自殺 } \\ \text { (2) 保険契約者または特約死亡保険金受 } \\ \text { 取人の故意 } \\ \text { (3) 戦争その他の变乱 }\end{array}$ |
| $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 特 } \\ \text { 定 } \\ \text { 疾 } \\ \text { 病 } \\ \text { 除 } \\ \text { 金 }\end{array}$ | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=1118&width=621&top_left_y=724&top_left_x=305) |                                                                                                                           | $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 特 } \\ \text { 定 } \\ \text { 疾 } \\ \text { 病 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 } \\ \text { 受 } \\ \text { 取 } \\ \text { 人 }\end{array}$ |                                                                                                                                                                                                            +                                                                                                                                                                                                             |
|         $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 障 } \\ \text { 害 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 }\end{array}$         | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=900&width=621&top_left_y=1837&top_left_x=305) |                                                                                                                           |                 $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 障 } \\ \text { 害 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 } \\ \text { 受 } \\ \text { 取 } \\ \text { 人 }\end{array}$                 |                                                                                                                                             ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-2.jpg?height=904&width=594&top_left_y=1837&top_left_x=1262)                                                                                                                                             |

|                                                                                                                                           |                                                                                                支払事由                                                                                                |                                                          支払額                                                           |                                                                                          受取人                                                                                           |                                                                                                                                                                                                                                                                                                                                           免責事由                                                                                                                                                                                                                                                                                                                                            |
| :---------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 介 } \\ \text { 護 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 }\end{array}$ | $\begin{array}{l}\text { 被保険者がこの特約の責任開始期以後の傷 } \\ \text { 害または疾病を原因として、この特約の保 } \\ \text { 険期間中に要介護状態（表4）に該当した } \\ \text { とき }\end{array}$ | $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 } \\ \text { 額 }\end{array}$ | $\begin{array}{l}\text { 特 } \\ \text { 約 } \\ \text { 介 } \\ \text { 護 } \\ \text { 保 } \\ \text { 険 } \\ \text { 金 } \\ \text { 受 } \\ \text { 取 } \\ \text { 人 }\end{array}$ | $\begin{array}{l}\text { つぎのいずれかにより左記の支払事由が生 } \\ \text { じたとき } \\ \text { (1) 保険契約者または被保険者の故意また } \\ \text { は重大な過失 } \\ \text { (2) 被保険者の犯罪行為 } \\ \text { (3) 被保険者の精神障害を原因とする事故 } \\ \text { (4) 被保険者の泥酔の状態を原因とする事 } \\ \text { 故 } \\ \text { (5) 被保険者が法令に定める運転資格を持 } \\ \text { たないで運転している間に生じた事故 } \\ \text { (6) 被保険者が法令に定める酒気帯び運転 } \\ \text { またはこれに相当する運転をしている間 } \\ \text { に生じた事故 } \\ \text { (7) 被保険者の薬物依存 } \\ \text { (8) 地震、噴火または津波 } \\ \text { (9) 戦争その他の变乱 }\end{array}$ |

2. 第 1 項の特約特定疾病保険金の支払事由の(1)に該当した場合でも、この特約の責任開始期の属する日からその日を含 めて 90 日以内に乳房の悪性新生物（表 1 中、基本分類コード C 50 の悪性新生物。以下同じ。）に䍜患し、医師により診断 確定されたときは、当会社は、特約特定疾病保険金を支払いません。ただし、その後（乳房の悪性新生物についてはこ の特約の責任開始期の属する日からその日を含めて 90 日経過後)、この特約の保険期間中に、被保険者がその乳房の悪性 新生物と因果関係のない悪性新生物（表 1）に罹患し、医師により診断確定されたときは、特約特定疾病保険金を支払 います。

## 4. 表 1 対象となる悪性新生物、急性心筋梗塞、脳卒中

対象となる悪性新生物、急性心筋梗塞、脳卒中とは、次表によって定義づけられる疾病とし、かつ、平成 21 年 3 月 23 日 総務省告示第 176 号にもとづ＜厚生労働省大臣官房統計情報部編「疾病、傷害および死因統計分類提要ＩＣＤ－10（2003 年版）準拠」に記載された分類項目中、次表の基本分類コードに規定される内容によるものをいいます。

|  疾 病 名  |                                                         疾 病 の 定 義                                                          |                                                           分 類 項 目                                                           |                                                                                            ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=85&width=150&top_left_y=1540&top_left_x=1689)                                                                                             |
| :--------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| 悪性新生物 | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=732&width=445&top_left_y=1617&top_left_x=442) | ![](https://cdn.mathpix.com/cropped/2023_09_15_60b7a05a11e6ef69c8bbg-3.jpg?height=732&width=750&top_left_y=1617&top_left_x=909) | $\begin{array}{l}\mathrm{C} 00-\mathrm{C} 1 \\ \mathrm{C} 00 \\ \mathrm{C} 01 \\ \mathrm{C} 02 \\ \mathrm{C} 03 \\ \mathrm{C} 04 \\ \mathrm{C} 05 \\ \mathrm{C} 06 \\ \mathrm{C} 07 \\ \mathrm{C} 08 \\ \mathrm{C} 09 \\ \mathrm{C} 10 \\ \mathrm{C} 11 \\ \mathrm{C} 12 \\ \mathrm{C} 13 \\ \mathrm{C} 14\end{array}$ |


================================================
FILE: libs/kotaemon/tests/simple_pipeline.py
================================================
import tempfile
from typing import List

from kotaemon.base import BaseComponent, LLMInterface, lazy
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
from kotaemon.indices import VectorRetrieval
from kotaemon.llms import AzureOpenAI
from kotaemon.storages import ChromaVectorStore


class Pipeline(BaseComponent):
    llm: AzureOpenAI = AzureOpenAI.withx(
        azure_endpoint="https://test.openai.azure.com/",
        openai_api_key="some-key",
        openai_api_version="2023-03-15-preview",
        deployment_name="gpt35turbo",
        temperature=0,
        request_timeout=60,
    )

    retrieving_pipeline: VectorRetrieval = VectorRetrieval.withx(
        vector_store=lazy(ChromaVectorStore).withx(path=str(tempfile.mkdtemp())),
        embedding=LCAzureOpenAIEmbeddings.withx(
            model="text-embedding-ada-002",
            deployment="embedding-deployment",
            azure_endpoint="https://test.openai.azure.com/",
            openai_api_key="some-key",
        ),
    )

    def run(self, text: str) -> LLMInterface:
        matched_texts: List[str] = self.retrieving_pipeline(text)
        return self.llm("\n".join(matched_texts))


================================================
FILE: libs/kotaemon/tests/test_agent.py
================================================
from unittest.mock import patch

import pytest
from openai.types.chat.chat_completion import ChatCompletion

from kotaemon.agents import (
    AgentType,
    BaseTool,
    GoogleSearchTool,
    LangchainAgent,
    LLMTool,
    ReactAgent,
    RewooAgent,
    WikipediaTool,
)
from kotaemon.llms import AzureChatOpenAI

from .conftest import skip_openai_lc_wrapper_test

FINAL_RESPONSE_TEXT = "Final Answer: Hello Cinnamon AI!"
REWOO_VALID_PLAN = (
    "#Plan1: Search for Cinnamon AI company on Google\n"
    "#E1: google_search[Cinnamon AI company]\n"
    "#Plan2: Search for Cinnamon on Wikipedia\n"
    "#E2: wikipedia[Cinnamon]\n"
)
REWOO_INVALID_PLAN = (
    "#E1: google_search[Cinnamon AI company]\n"
    "#Plan2: Search for Cinnamon on Wikipedia\n"
    "#E2: wikipedia[Cinnamon]\n"
)


def generate_chat_completion_obj(text):
    return ChatCompletion.parse_obj(
        {
            "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
            "object": "chat.completion",
            "created": 1692338378,
            "model": "gpt-35-turbo",
            "system_fingerprint": None,
            "choices": [
                {
                    "index": 0,
                    "finish_reason": "stop",
                    "message": {
                        "role": "assistant",
                        "content": text,
                        "function_call": None,
                        "tool_calls": None,
                    },
                    "logprobs": None,
                }
            ],
            "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
        }
    )


_openai_chat_completion_responses_rewoo = [
    generate_chat_completion_obj(text=text)
    for text in [REWOO_VALID_PLAN, FINAL_RESPONSE_TEXT]
]

_openai_chat_completion_responses_rewoo_error = [
    generate_chat_completion_obj(text=text)
    for text in [REWOO_INVALID_PLAN, FINAL_RESPONSE_TEXT]
]

_openai_chat_completion_responses_react = [
    generate_chat_completion_obj(text=text)
    for text in [
        (
            "I don't have prior knowledge about Cinnamon AI company, "
            "so I should gather information about it.\n"
            "Action: wikipedia\n"
            "Action Input: Cinnamon AI company\n"
        ),
        (
            "The information retrieved from Wikipedia is not "
            "about Cinnamon AI company, but about Blue Prism, "
            "a British multinational software corporation. "
            "I need to try another source to gather information "
            "about Cinnamon AI company.\n"
            "Action: google_search\n"
            "Action Input: Cinnamon AI company\n"
        ),
        FINAL_RESPONSE_TEXT,
    ]
]

_openai_chat_completion_responses_react_langchain_tool = [
    generate_chat_completion_obj(text=text)
    for text in [
        (
            "I don't have prior knowledge about Cinnamon AI company, "
            "so I should gather information about it.\n"
            "Action: wikipedia\n"
            "Action Input: Cinnamon AI company\n"
        ),
        # (
        #     "The information retrieved from Wikipedia is not "
        #     "about Cinnamon AI company, but about Blue Prism, "
        #     "a British multinational software corporation. "
        #     "I need to try another source to gather information "
        #     "about Cinnamon AI company.\n"
        #     "Action: duckduckgo_search\n"
        #     "Action Input: Cinnamon AI company\n"
        # ),
        FINAL_RESPONSE_TEXT,
    ]
]


@pytest.fixture
def llm():
    return AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_rewoo_error,
)
def test_agent_fail(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
        LLMTool(llm=llm),
    ]

    agent = RewooAgent(planner_llm=llm, solver_llm=llm, plugins=plugins)

    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert not response
    assert response.status == "failed"


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_rewoo,
)
def test_rewoo_agent(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
        LLMTool(llm=llm),
    ]

    agent = RewooAgent(planner_llm=llm, solver_llm=llm, plugins=plugins)

    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert response.text == FINAL_RESPONSE_TEXT


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_react,
)
def test_react_agent(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
        LLMTool(llm=llm),
    ]
    agent = ReactAgent(llm=llm, plugins=plugins, max_iterations=4)

    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert response.text == FINAL_RESPONSE_TEXT


@skip_openai_lc_wrapper_test
@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_react,
)
def test_react_agent_langchain(openai_completion, llm, mock_google_search):
    from langchain.agents import AgentType, initialize_agent

    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
        LLMTool(llm=llm),
    ]
    langchain_plugins = [tool.to_langchain_format() for tool in plugins]
    agent = initialize_agent(
        langchain_plugins,
        llm.to_langchain_format(),
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True,
    )
    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert response


@skip_openai_lc_wrapper_test
@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_react,
)
def test_wrapper_agent_langchain(openai_completion, llm, mock_google_search):
    plugins = [
        GoogleSearchTool(),
        WikipediaTool(),
        LLMTool(llm=llm),
    ]
    agent = LangchainAgent(
        llm=llm,
        plugins=plugins,
        agent_type=AgentType.react,
    )
    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert response


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses_react_langchain_tool,
)
def test_react_agent_with_langchain_tools(openai_completion, llm):
    from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
    from langchain_community.utilities import WikipediaAPIWrapper

    wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
    search = DuckDuckGoSearchRun()

    langchain_plugins = [wikipedia, search]
    plugins = [BaseTool.from_langchain_format(tool) for tool in langchain_plugins]
    agent = ReactAgent(llm=llm, plugins=plugins, max_iterations=4)

    response = agent("Tell me about Cinnamon AI company")
    openai_completion.assert_called()
    assert response.text == FINAL_RESPONSE_TEXT


================================================
FILE: libs/kotaemon/tests/test_composite.py
================================================
from copy import deepcopy

import pytest
from openai.types.chat.chat_completion import ChatCompletion

from kotaemon.llms import (
    AzureChatOpenAI,
    BasePromptComponent,
    GatedBranchingPipeline,
    GatedLinearPipeline,
    SimpleBranchingPipeline,
    SimpleLinearPipeline,
)
from kotaemon.parsers import RegexExtractor

_openai_chat_completion_response = ChatCompletion.parse_obj(
    {
        "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
        "object": "chat.completion",
        "created": 1692338378,
        "model": "gpt-35-turbo",
        "system_fingerprint": None,
        "choices": [
            {
                "index": 0,
                "finish_reason": "stop",
                "message": {
                    "role": "assistant",
                    "content": "This is a test 123",
                    "finish_reason": "length",
                    "logprobs": None,
                },
                "logprobs": None,
            }
        ],
        "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
    }
)


@pytest.fixture
def mock_llm():
    return AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )


@pytest.fixture
def mock_post_processor():
    return RegexExtractor(pattern=r"\d+")


@pytest.fixture
def mock_prompt():
    return BasePromptComponent(template="Test prompt {value}")


@pytest.fixture
def mock_simple_linear_pipeline(mock_prompt, mock_llm, mock_post_processor):
    return SimpleLinearPipeline(
        prompt=mock_prompt, llm=mock_llm, post_processor=mock_post_processor
    )


@pytest.fixture
def mock_gated_linear_pipeline_positive(mock_prompt, mock_llm, mock_post_processor):
    return GatedLinearPipeline(
        prompt=mock_prompt,
        llm=mock_llm,
        post_processor=mock_post_processor,
        condition=RegexExtractor(pattern="positive"),
    )


@pytest.fixture
def mock_gated_linear_pipeline_negative(mock_prompt, mock_llm, mock_post_processor):
    return GatedLinearPipeline(
        prompt=mock_prompt,
        llm=mock_llm,
        post_processor=mock_post_processor,
        condition=RegexExtractor(pattern="negative"),
    )


def test_simple_linear_pipeline_run(mocker, mock_simple_linear_pipeline):
    openai_mocker = mocker.patch(
        "openai.resources.chat.completions.Completions.create",
        return_value=_openai_chat_completion_response,
    )

    result = mock_simple_linear_pipeline(value="abc")

    assert result.text == "123"
    assert openai_mocker.call_count == 1


def test_gated_linear_pipeline_run_positive(
    mocker, mock_gated_linear_pipeline_positive
):
    openai_mocker = mocker.patch(
        "openai.resources.chat.completions.Completions.create",
        return_value=_openai_chat_completion_response,
    )

    result = mock_gated_linear_pipeline_positive(
        value="abc", condition_text="positive condition"
    )

    assert result.text == "123"
    assert openai_mocker.call_count == 1


def test_gated_linear_pipeline_run_negative(
    mocker, mock_gated_linear_pipeline_positive
):
    openai_mocker = mocker.patch(
        "openai.resources.chat.completions.Completions.create",
        return_value=_openai_chat_completion_response,
    )

    result = mock_gated_linear_pipeline_positive(
        value="abc", condition_text="negative condition"
    )

    assert result.content is None
    assert openai_mocker.call_count == 0


def test_simple_branching_pipeline_run(mocker, mock_simple_linear_pipeline):
    response0: ChatCompletion = _openai_chat_completion_response
    response1: ChatCompletion = deepcopy(_openai_chat_completion_response)
    response1.choices[0].message.content = "a quick brown fox"
    response2: ChatCompletion = deepcopy(_openai_chat_completion_response)
    response2.choices[0].message.content = "jumps over the lazy dog 456"
    openai_mocker = mocker.patch(
        "openai.resources.chat.completions.Completions.create",
        side_effect=[response0, response1, response2],
    )
    pipeline = SimpleBranchingPipeline()
    for _ in range(3):
        pipeline.add_branch(mock_simple_linear_pipeline)

    result = pipeline.run(value="abc")
    texts = [each.text for each in result]

    assert len(result) == 3
    assert texts == ["123", "", "456"]
    assert openai_mocker.call_count == 3


def test_simple_gated_branching_pipeline_run(
    mocker, mock_gated_linear_pipeline_positive, mock_gated_linear_pipeline_negative
):
    response0: ChatCompletion = deepcopy(_openai_chat_completion_response)
    response0.choices[0].message.content = "a quick brown fox"
    openai_mocker = mocker.patch(
        "openai.resources.chat.completions.Completions.create",
        return_value=response0,
    )
    pipeline = GatedBranchingPipeline()

    pipeline.add_branch(mock_gated_linear_pipeline_negative)
    pipeline.add_branch(mock_gated_linear_pipeline_positive)
    pipeline.add_branch(mock_gated_linear_pipeline_positive)

    result = pipeline.run(value="abc", condition_text="positive condition")

    assert result.text == ""
    assert openai_mocker.call_count == 2


================================================
FILE: libs/kotaemon/tests/test_cot.py
================================================
from unittest.mock import patch

from openai.types.chat.chat_completion import ChatCompletion

from kotaemon.llms import AzureChatOpenAI
from kotaemon.llms.cot import ManualSequentialChainOfThought, Thought

_openai_chat_completion_response = [
    ChatCompletion.parse_obj(
        {
            "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
            "object": "chat.completion",
            "created": 1692338378,
            "model": "gpt-35-turbo",
            "system_fingerprint": None,
            "choices": [
                {
                    "index": 0,
                    "finish_reason": "stop",
                    "message": {
                        "role": "assistant",
                        "content": text,
                        "function_call": None,
                        "tool_calls": None,
                    },
                    "logprobs": None,
                }
            ],
            "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
        }
    )
    for text in ["Bonjour", "こんにちは (Konnichiwa)"]
]


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_response,
)
def test_cot_plus_operator(openai_completion):
    llm = AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )
    thought1 = Thought(
        prompt="Word {word} in {language} is ",
        llm=llm,
        post_process=lambda string: {"translated": string},
    )
    thought2 = Thought(
        prompt="Translate {translated} to Japanese",
        llm=llm,
        post_process=lambda string: {"output": string},
    )
    thought = thought1 + thought2
    output = thought(word="hello", language="French")
    assert output.content == {
        "word": "hello",
        "language": "French",
        "translated": "Bonjour",
        "output": "こんにちは (Konnichiwa)",
    }


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_response,
)
def test_cot_manual(openai_completion):
    llm = AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )
    thought1 = Thought(
        prompt="Word {word} in {language} is ",
        post_process=lambda string: {"translated": string},
    )
    thought2 = Thought(
        prompt="Translate {translated} to Japanese",
        post_process=lambda string: {"output": string},
    )
    thought = ManualSequentialChainOfThought(thoughts=[thought1, thought2], llm=llm)
    output = thought(word="hello", language="French")
    assert output.content == {
        "word": "hello",
        "language": "French",
        "translated": "Bonjour",
        "output": "こんにちは (Konnichiwa)",
    }


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_response,
)
def test_cot_with_termination_callback(openai_completion):
    llm = AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )
    thought1 = Thought(
        prompt="Word {word} in {language} is ",
        post_process=lambda string: {"translated": string},
    )
    thought2 = Thought(
        prompt="Translate {translated} to Japanese",
        post_process=lambda string: {"output": string},
    )
    thought = ManualSequentialChainOfThought(
        thoughts=[thought1, thought2],
        llm=llm,
        terminate=lambda d: True if d.get("translated", "") == "Bonjour" else False,
    )
    output = thought(word="hallo", language="French")
    assert output.content == {
        "word": "hallo",
        "language": "French",
        "translated": "Bonjour",
    }


================================================
FILE: libs/kotaemon/tests/test_docstores.py
================================================
import os
from unittest.mock import patch

import pytest
from elastic_transport import ApiResponseMeta

from kotaemon.base import Document
from kotaemon.storages import (
    ElasticsearchDocumentStore,
    InMemoryDocumentStore,
    SimpleFileDocumentStore,
)

meta_success = ApiResponseMeta(
    status=200,
    http_version="1.1",
    headers={"x-elastic-product": "Elasticsearch"},
    duration=1.0,
    node=None,
)
meta_fail = ApiResponseMeta(
    status=404,
    http_version="1.1",
    headers={"x-elastic-product": "Elasticsearch"},
    duration=1.0,
    node=None,
)
_elastic_search_responses = [
    # check exist
    (meta_fail, None),
    # create index
    (
        meta_success,
        {"acknowledged": True, "shards_acknowledged": True, "index": "test"},
    ),
    # count API
    (
        meta_success,
        [{"epoch": "1700474422", "timestamp": "10:00:22", "count": "0"}],
    ),
    # add documents
    (
        meta_success,
        {
            "took": 50,
            "errors": False,
            "items": [
                {
                    "index": {
                        "_index": "test",
                        "_id": "a3774dab-b8f1-43ba-adb8-842cb7a76eeb",
                        "_version": 1,
                        "result": "created",
                        "_shards": {"total": 2, "successful": 1, "failed": 0},
                        "_seq_no": 0,
                        "_primary_term": 1,
                        "status": 201,
                    }
                },
                {
                    "index": {
                        "_index": "test",
                        "_id": "b44f5593-7587-4f91-afd0-5736e5bd5bfe",
                        "_version": 1,
                        "result": "created",
                        "_shards": {"total": 2, "successful": 1, "failed": 0},
                        "_seq_no": 1,
                        "_primary_term": 1,
                        "status": 201,
                    }
                },
                {
                    "index": {
                        "_index": "test",
                        "_id": "13ae7825-eef9-4214-a164-983c2e6bbeaa",
                        "_version": 1,
                        "result": "created",
                        "_shards": {"total": 2, "successful": 1, "failed": 0},
                        "_seq_no": 2,
                        "_primary_term": 1,
                        "status": 201,
                    }
                },
            ],
        },
    ),
    # check exist
    (
        meta_success,
        {"_shards": {"total": 2, "successful": 1, "failed": 0}},
    ),
    # count
    (
        meta_success,
        [{"epoch": "1700474422", "timestamp": "10:00:22", "count": "3"}],
    ),
    # get_all
    (
        meta_success,
        {
            "took": 1,
            "timed_out": False,
            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
            "hits": {
                "total": {"value": 3, "relation": "eq"},
                "max_score": 1.0,
                "hits": [
                    {
                        "_index": "test",
                        "_id": "a3774dab-b8f1-43ba-adb8-842cb7a76eeb",
                        "_score": 1.0,
                        "_source": {"content": "Sample text 0", "metadata": {}},
                    },
                    {
                        "_index": "test",
                        "_id": "b44f5593-7587-4f91-afd0-5736e5bd5bfe",
                        "_score": 1.0,
                        "_source": {"content": "Sample text 1", "metadata": {}},
                    },
                    {
                        "_index": "test",
                        "_id": "13ae7825-eef9-4214-a164-983c2e6bbeaa",
                        "_score": 1.0,
                        "_source": {"content": "Sample text 2", "metadata": {}},
                    },
                ],
            },
        },
    ),
    # get by-id
    (
        meta_success,
        {
            "took": 1,
            "timed_out": False,
            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
            "hits": {
                "total": {"value": 1, "relation": "eq"},
                "max_score": 1.0,
                "hits": [
                    {
                        "_index": "test",
                        "_id": "a3774dab-b8f1-43ba-adb8-842cb7a76eeb",
                        "_score": 1.0,
                        "_source": {"content": "Sample text 0", "metadata": {}},
                    }
                ],
            },
        },
    ),
    # query
    (
        meta_success,
        {
            "took": 2,
            "timed_out": False,
            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
            "hits": {
                "total": {"value": 3, "relation": "eq"},
                "max_score": 0.13353139,
                "hits": [
                    {
                        "_index": "test",
                        "_id": "a3774dab-b8f1-43ba-adb8-842cb7a76eeb",
                        "_score": 0.13353139,
                        "_source": {"content": "Sample text 0", "metadata": {}},
                    },
                    {
                        "_index": "test",
                        "_id": "b44f5593-7587-4f91-afd0-5736e5bd5bfe",
                        "_score": 0.13353139,
                        "_source": {"content": "Sample text 1", "metadata": {}},
                    },
                    {
                        "_index": "test",
                        "_id": "13ae7825-eef9-4214-a164-983c2e6bbeaa",
                        "_score": 0.13353139,
                        "_source": {"content": "Sample text 2", "metadata": {}},
                    },
                ],
            },
        },
    ),
    # delete
    (
        meta_success,
        {
            "took": 10,
            "timed_out": False,
            "total": 1,
            "deleted": 1,
            "batches": 1,
            "version_conflicts": 0,
            "noops": 0,
            "retries": {"bulk": 0, "search": 0},
            "throttled_millis": 0,
            "requests_per_second": -1.0,
            "throttled_until_millis": 0,
            "failures": [],
        },
    ),
    # check exists
    (
        meta_success,
        {"_shards": {"total": 2, "successful": 1, "failed": 0}},
    ),
    # count
    (
        meta_success,
        [{"epoch": "1700549363", "timestamp": "06:49:23", "count": "2"}],
    ),
]


def test_inmemory_document_store_base_interfaces(tmp_path):
    """Test all interfaces of a a document store"""

    store = InMemoryDocumentStore()
    docs = [
        Document(text=f"Sample text {idx}", meta={"meta_key": f"meta_value_{idx}"})
        for idx in range(10)
    ]

    # Test add and get all
    assert len(store.get_all()) == 0, "Document store should be empty"
    store.add(docs)
    assert len(store.get_all()) == 10, "Document store should have 10 documents"

    # Test add with provided ids
    store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)])
    assert len(store.get_all()) == 20, "Document store should have 20 documents"

    # Test add without exist_ok
    with pytest.raises(ValueError):
        store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)])

    # Update ok with add exist_ok
    store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)], exist_ok=True)
    assert len(store.get_all()) == 20, "Document store should have 20 documents"

    # Test get with str id
    matched = store.get(docs[0].doc_id)
    assert len(matched) == 1, "Should return 1 document"
    assert matched[0].text == docs[0].text, "Should return the correct document"

    # Test get with list of ids
    matched = store.get([docs[0].doc_id, docs[1].doc_id])
    assert len(matched) == 2, "Should return 2 documents"
    assert [doc.text for doc in matched] == [doc.text for doc in docs[:2]]

    # Test delete with str id
    store.delete(docs[0].doc_id)
    assert len(store.get_all()) == 19, "Document store should have 19 documents"

    # Test delete with list of ids
    store.delete([docs[1].doc_id, docs[2].doc_id])
    assert len(store.get_all()) == 17, "Document store should have 17 documents"

    # Test save
    store.save(tmp_path / "store.json")
    assert (tmp_path / "store.json").exists(), "File should exist"

    # Test load
    store2 = InMemoryDocumentStore()
    store2.load(tmp_path / "store.json")
    assert len(store2.get_all()) == 17, "Laded document store should have 17 documents"

    os.remove(tmp_path / "store.json")


def test_simplefile_document_store_base_interfaces(tmp_path):
    """Test all interfaces of a a document store"""

    store = SimpleFileDocumentStore(path=tmp_path)
    docs = [
        Document(text=f"Sample text {idx}", meta={"meta_key": f"meta_value_{idx}"})
        for idx in range(10)
    ]

    # Test add and get all
    assert len(store.get_all()) == 0, "Document store should be empty"
    store.add(docs)
    assert len(store.get_all()) == 10, "Document store should have 10 documents"

    # Test add with provided ids
    store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)])
    assert len(store.get_all()) == 20, "Document store should have 20 documents"

    # Test add without exist_ok
    with pytest.raises(ValueError):
        store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)])

    # Update ok with add exist_ok
    store.add(docs=docs, ids=[f"doc_{idx}" for idx in range(10)], exist_ok=True)
    assert len(store.get_all()) == 20, "Document store should have 20 documents"

    # Test get with str id
    matched = store.get(docs[0].doc_id)
    assert len(matched) == 1, "Should return 1 document"
    assert matched[0].text == docs[0].text, "Should return the correct document"

    # Test get with list of ids
    matched = store.get([docs[0].doc_id, docs[1].doc_id])
    assert len(matched) == 2, "Should return 2 documents"
    assert [doc.text for doc in matched] == [doc.text for doc in docs[:2]]

    # Test delete with str id
    store.delete(docs[0].doc_id)
    assert len(store.get_all()) == 19, "Document store should have 19 documents"

    # Test delete with list of ids
    store.delete([docs[1].doc_id, docs[2].doc_id])
    assert len(store.get_all()) == 17, "Document store should have 17 documents"

    # Test save
    assert (tmp_path / "default.json").exists(), "File should exist"

    # Test load
    store2 = SimpleFileDocumentStore(path=tmp_path)
    assert len(store2.get_all()) == 17, "Laded document store should have 17 documents"

    os.remove(tmp_path / "default.json")


@patch(
    "elastic_transport.Transport.perform_request",
    side_effect=_elastic_search_responses,
)
def test_elastic_document_store(elastic_api):
    store = ElasticsearchDocumentStore(collection_name="test")

    docs = [
        Document(text=f"Sample text {idx}", meta={"meta_key": f"meta_value_{idx}"})
        for idx in range(3)
    ]

    # Test add and get all
    assert store.count() == 0, "Document store should be empty"
    store.add(docs)
    assert store.count() == 3, "Document store count should changed after adding docs"

    docs = store.get_all()
    first_doc = docs[0]
    assert len(docs) == 3, "Document store get_all() failed"

    doc_by_ids = store.get(first_doc.doc_id)
    assert doc_by_ids[0].doc_id == first_doc.doc_id, "Document store get() failed"

    docs = store.query("text")
    assert len(docs) == 3, "Document store query() failed"

    # delete test
    store.delete(first_doc.doc_id)
    assert store.count() == 2, "Document store delete() failed"

    elastic_api.assert_called()


================================================
FILE: libs/kotaemon/tests/test_documents.py
================================================
from kotaemon.base.schema import Document, RetrievedDocument

from .conftest import skip_when_haystack_not_installed


def test_document_constructor_with_builtin_types():
    for value in ["str", 1, {}, set(), [], tuple, None]:
        doc = Document(value)
        assert doc.text == (str(value) if value else "")
        assert doc.content == value
        assert bool(doc) == bool(value)


def test_document_constructor_with_document():
    text = "Sample text"
    doc1 = Document(text)
    doc2 = Document(doc1)
    assert doc2.text == doc1.text
    assert doc2.content == doc1.content


@skip_when_haystack_not_installed
def test_document_to_haystack_format():
    from haystack.schema import Document as HaystackDocument

    text = "Sample text"
    metadata = {"filename": "sample.txt"}
    doc = Document(text, metadata=metadata)
    haystack_doc = doc.to_haystack_format()
    assert isinstance(haystack_doc, HaystackDocument)
    assert haystack_doc.content == doc.text
    assert haystack_doc.meta == metadata


def test_retrieved_document_default_values():
    sample_text = "text"
    retrieved_doc = RetrievedDocument(text=sample_text)
    assert retrieved_doc.text == sample_text
    assert retrieved_doc.score == 0.0
    assert retrieved_doc.retrieval_metadata == {}


def test_retrieved_document_attributes():
    sample_text = "text"
    score = 0.8
    metadata = {"source": "retrieval_system"}
    retrieved_doc = RetrievedDocument(
        text=sample_text, score=score, retrieval_metadata=metadata
    )
    assert retrieved_doc.text == sample_text
    assert retrieved_doc.score == score
    assert retrieved_doc.retrieval_metadata == metadata


================================================
FILE: libs/kotaemon/tests/test_embedding_models.py
================================================
import json
from pathlib import Path
from unittest.mock import Mock, patch

from openai.types.create_embedding_response import CreateEmbeddingResponse

from kotaemon.base import Document, DocumentWithEmbedding
from kotaemon.embeddings import (
    AzureOpenAIEmbeddings,
    FastEmbedEmbeddings,
    LCCohereEmbeddings,
    LCHuggingFaceEmbeddings,
    OpenAIEmbeddings,
    VoyageAIEmbeddings,
)

from .conftest import (
    skip_when_cohere_not_installed,
    skip_when_fastembed_not_installed,
    skip_when_sentence_bert_not_installed,
    skip_when_voyageai_not_installed,
)

with open(Path(__file__).parent / "resources" / "embedding_openai_batch.json") as f:
    openai_embedding_batch = CreateEmbeddingResponse.model_validate(json.load(f))

with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))


def assert_embedding_result(output):
    assert isinstance(output, list)
    assert isinstance(output[0], Document)
    assert isinstance(output[0].embedding, list)
    assert isinstance(output[0].embedding[0], float)


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_azureopenai_embeddings_raw(openai_embedding_call):
    model = AzureOpenAIEmbeddings(
        azure_deployment="embedding-deployment",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )
    output = model("Hello world")
    assert_embedding_result(output)
    openai_embedding_call.assert_called()


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding_batch,
)
def test_lcazureopenai_embeddings_batch_raw(openai_embedding_call):
    model = AzureOpenAIEmbeddings(
        azure_deployment="embedding-deployment",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )
    output = model(["Hello world", "Goodbye world"])
    assert_embedding_result(output)
    openai_embedding_call.assert_called()


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding_batch,
)
def test_azureopenai_embeddings_batch_raw(openai_embedding_call):
    model = AzureOpenAIEmbeddings(
        azure_deployment="text-embedding-ada-002",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )
    output = model(["Hello world", "Goodbye world"])
    assert_embedding_result(output)
    openai_embedding_call.assert_called()


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_openai_embeddings_raw(openai_embedding_call):
    model = OpenAIEmbeddings(
        api_key="some-key",
        model="text-embedding-ada-002",
    )
    output = model("Hello world")
    assert_embedding_result(output)
    openai_embedding_call.assert_called()


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding_batch,
)
def test_openai_embeddings_batch_raw(openai_embedding_call):
    model = OpenAIEmbeddings(
        api_key="some-key",
        model="text-embedding-ada-002",
    )
    output = model(["Hello world", "Goodbye world"])
    assert_embedding_result(output)
    openai_embedding_call.assert_called()


@skip_when_sentence_bert_not_installed
@patch(
    "sentence_transformers.SentenceTransformer",
    side_effect=lambda *args, **kwargs: None,
)
@patch(
    "langchain.embeddings.huggingface.HuggingFaceBgeEmbeddings.embed_documents",
    side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],
)
def test_lchuggingface_embeddings(
    langchain_huggingface_embedding_call, sentence_transformers_init
):
    model = LCHuggingFaceEmbeddings(
        model_name="intfloat/multilingual-e5-large",
        model_kwargs={"device": "cpu"},
        encode_kwargs={"normalize_embeddings": False},
    )

    output = model("Hello World")
    assert_embedding_result(output)
    sentence_transformers_init.assert_called()
    langchain_huggingface_embedding_call.assert_called()


@skip_when_cohere_not_installed
@patch(
    "langchain_cohere.CohereEmbeddings.embed_documents",
    side_effect=lambda *args, **kwargs: [[1.0, 2.1, 3.2]],
)
def test_lccohere_embeddings(langchain_cohere_embedding_call):
    model = LCCohereEmbeddings(
        model="embed-english-light-v2.0",
        cohere_api_key="my-api-key",
        user_agent="test",
    )

    output = model("Hello World")
    assert_embedding_result(output)
    langchain_cohere_embedding_call.assert_called()


@skip_when_fastembed_not_installed
def test_fastembed_embeddings():
    model = FastEmbedEmbeddings()
    output = model("Hello World")
    assert_embedding_result(output)


voyage_output_mock = Mock()
voyage_output_mock.embeddings = [[1.0, 2.1, 3.2]]


@skip_when_voyageai_not_installed
@patch("voyageai.Client.embed", return_value=voyage_output_mock)
@patch("voyageai.AsyncClient.embed", return_value=voyage_output_mock)
def test_voyageai_embeddings(sync_call, async_call):
    model = VoyageAIEmbeddings(api_key="test")
    output = model("Hello, world!")
    assert all(isinstance(doc, DocumentWithEmbedding) for doc in output)


================================================
FILE: libs/kotaemon/tests/test_indexing_retrieval.py
================================================
import json
from pathlib import Path
from typing import cast
from unittest.mock import patch

from openai.types.create_embedding_response import CreateEmbeddingResponse

from kotaemon.base import Document
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore

with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_indexing(tmp_path):
    db = ChromaVectorStore(path=str(tmp_path))
    doc_store = InMemoryDocumentStore()
    embedding = AzureOpenAIEmbeddings(
        azure_deployment="text-embedding-ada-002",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )

    pipeline = VectorIndexing(vector_store=db, embedding=embedding, doc_store=doc_store)
    pipeline.doc_store = cast(InMemoryDocumentStore, pipeline.doc_store)
    pipeline.vector_store = cast(ChromaVectorStore, pipeline.vector_store)
    assert pipeline.vector_store._collection.count() == 0, "Expected empty collection"
    assert len(pipeline.doc_store._store) == 0, "Expected empty doc store"
    pipeline(text=Document(text="Hello world"))
    assert pipeline.vector_store._collection.count() == 1, "Index 1 item"
    assert len(pipeline.doc_store._store) == 1, "Expected 1 document"


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_retrieving(tmp_path):
    db = ChromaVectorStore(path=str(tmp_path))
    doc_store = InMemoryDocumentStore()
    embedding = AzureOpenAIEmbeddings(
        azure_deployment="text-embedding-ada-002",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )

    index_pipeline = VectorIndexing(
        vector_store=db, embedding=embedding, doc_store=doc_store
    )
    retrieval_pipeline = VectorRetrieval(
        vector_store=db, doc_store=doc_store, embedding=embedding
    )

    index_pipeline(text=Document(text="Hello world"))
    output = retrieval_pipeline(text="Hello world")
    output1 = retrieval_pipeline(text="Hello world")

    assert len(output) == 1, "Expect 1 results"
    assert output == output1, "Expect identical results"


================================================
FILE: libs/kotaemon/tests/test_ingestor.py
================================================
from pathlib import Path

from kotaemon.indices.ingests import DocumentIngestor
from kotaemon.indices.splitters import TokenSplitter


def test_ingestor_include_src():
    dirpath = Path(__file__).parent
    ingestor = DocumentIngestor(
        pdf_mode="normal",
        text_splitter=TokenSplitter(chunk_size=200, chunk_overlap=10),
    )
    nodes = ingestor(dirpath / "resources" / "table.pdf")
    assert type(nodes) is list
    assert nodes[0].relationships


================================================
FILE: libs/kotaemon/tests/test_llms_chat_models.py
================================================
from pathlib import Path
from unittest.mock import patch

import pytest

from kotaemon.base.schema import AIMessage, HumanMessage, LLMInterface, SystemMessage
from kotaemon.llms import AzureChatOpenAI, LlamaCppChat

try:
    pass
except ImportError:
    pass

from openai.types.chat.chat_completion import ChatCompletion

from .conftest import skip_llama_cpp_not_installed

_openai_chat_completion_response = ChatCompletion.parse_obj(
    {
        "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
        "object": "chat.completion",
        "created": 1692338378,
        "model": "gpt-35-turbo",
        "system_fingerprint": None,
        "choices": [
            {
                "index": 0,
                "finish_reason": "stop",
                "message": {
                    "role": "assistant",
                    "content": "Hello! How can I assist you today?",
                    "function_call": None,
                    "tool_calls": None,
                },
                "logprobs": None,
            }
        ],
        "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
    }
)


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
)
def test_azureopenai_model(openai_completion):
    model = AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )
    # test for str input - stream mode
    output = model("hello world")
    assert isinstance(
        output, LLMInterface
    ), "Output for single text is not LLMInterface"
    openai_completion.assert_called()

    # test for list[message] input - stream mode
    messages = [
        SystemMessage(content="You are a philosohper"),
        HumanMessage(content="What is the meaning of life"),
        AIMessage(content="42"),
        HumanMessage(content="What is the meaning of 42"),
    ]

    output = model(messages)
    assert isinstance(
        output, LLMInterface
    ), "Output for single text is not LLMInterface"
    openai_completion.assert_called()


@skip_llama_cpp_not_installed
def test_llamacpp_chat():
    from llama_cpp import Llama

    dir_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"

    # test initialization
    model = LlamaCppChat(model_path=str(dir_path), chat_format="llama", vocab_only=True)
    assert isinstance(model.client_object, Llama), "Error initializing llama_cpp.Llama"

    # test error if model_path is omitted
    with pytest.raises(ValueError):
        model = LlamaCppChat(chat_format="llama", vocab_only=True)
        model.client_object

    # test error if chat_format is omitted
    with pytest.raises(ValueError):
        model = LlamaCppChat(model_path=str(dir_path), vocab_only=True)
        model.client_object


================================================
FILE: libs/kotaemon/tests/test_llms_completion_models.py
================================================
from pathlib import Path
from unittest.mock import patch

from kotaemon.base.schema import LLMInterface
from kotaemon.llms import AzureOpenAI, LlamaCpp, OpenAI

try:
    from langchain_openai import AzureOpenAI as AzureOpenAILC
    from langchain_openai import OpenAI as OpenAILC
except ImportError:
    from langchain.llms import AzureOpenAI as AzureOpenAILC
    from langchain.llms import OpenAI as OpenAILC

from openai.types.completion import Completion

from .conftest import skip_llama_cpp_not_installed, skip_openai_lc_wrapper_test

_openai_completion_response = Completion.parse_obj(
    {
        "id": "cmpl-7qyNoIo6gRSCJR0hi8o3ZKBH4RkJ0",
        "object": "text_completion",
        "created": 1392751226,
        "model": "gpt-35-turbo",
        "system_fingerprint": None,
        "choices": [
            {
                "text": "completion",
                "index": 0,
                "finish_reason": "length",
                "logprobs": None,
            }
        ],
        "usage": {"completion_tokens": 20, "prompt_tokens": 2, "total_tokens": 22},
    }
)


@skip_openai_lc_wrapper_test
@patch(
    "openai.resources.completions.Completions.create",
    side_effect=lambda *args, **kwargs: _openai_completion_response,
)
def test_azureopenai_model(openai_completion):
    model = AzureOpenAI(
        azure_endpoint="https://test.openai.azure.com/",
        openai_api_key="some-key",
        openai_api_version="2023-03-15-preview",
        deployment_name="gpt35turbo",
        temperature=0,
        request_timeout=60,
    )
    assert isinstance(
        model.to_langchain_format(), AzureOpenAILC
    ), "Agent not wrapped in Langchain's AzureOpenAI"

    output = model("hello world")
    assert isinstance(
        output, LLMInterface
    ), "Output for single text is not LLMInterface"


@patch(
    "openai.resources.completions.Completions.create",
    side_effect=lambda *args, **kwargs: _openai_completion_response,
)
def test_openai_model(openai_completion):
    model = OpenAI(
        openai_api_base="https://test.openai.azure.com/",
        openai_api_key="some-key",
        openai_api_version="2023-03-15-preview",
        deployment_name="gpt35turbo",
        temperature=0,
        request_timeout=60,
    )
    assert isinstance(
        model.to_langchain_format(), OpenAILC
    ), "Agent is not wrapped in Langchain's OpenAI"

    output = model("hello world")
    assert isinstance(
        output, LLMInterface
    ), "Output for single text is not LLMInterface"


@skip_llama_cpp_not_installed
def test_llamacpp_model():
    weight_path = Path(__file__).parent / "resources" / "ggml-vocab-llama.gguf"

    # test initialization
    model = LlamaCpp(model_path=str(weight_path), vocab_only=True)
    assert isinstance(model._obj, model._get_lc_class())


================================================
FILE: libs/kotaemon/tests/test_mcp_manager.py
================================================
"""Tests for ktem.mcp.manager module.

Uses an in-memory SQLite engine to test MCPManager CRUD operations
without depending on the application's database.
"""

import pytest
from sqlalchemy import JSON, Column, String, create_engine
from sqlalchemy.orm import DeclarativeBase, Session

# ---------------------------------------------------------------------------
# In-memory DB setup (mirrors ktem.mcp.db but fully isolated)
# ---------------------------------------------------------------------------


class _Base(DeclarativeBase):
    pass


class _MCPTable(_Base):
    __tablename__ = "mcp_table"
    name = Column(String, primary_key=True, unique=True)
    config = Column(JSON, default={})


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture()
def manager():
    """Fresh manager with a clean in-memory DB for each test."""
    engine = create_engine("sqlite:///:memory:")
    _MCPTable.metadata.create_all(engine)
    return MCPManagerForTest(engine)


# ---------------------------------------------------------------------------
# Minimal MCPManager that uses the test engine
# ---------------------------------------------------------------------------


class MCPManagerForTest:
    """Same logic as ktem.mcp.manager.MCPManager but uses our test engine."""

    def __init__(self, engine):
        self._engine = engine
        self._info: dict[str, dict] = {}
        self.load()

    def load(self):
        self._info = {}
        with Session(self._engine) as session:
            for item in session.query(_MCPTable).all():
                self._info[item.name] = {  # type: ignore[index]
                    "name": item.name,
                    "config": item.config,
                }

    def info(self) -> dict:
        return self._info

    def get(self, name: str) -> dict | None:
        return self._info.get(name)

    def add(self, name: str, config: dict):
        name = name.strip()
        if not name:
            raise ValueError("Name must not be empty")
        with Session(self._engine) as session:
            session.add(_MCPTable(name=name, config=config))
            session.commit()
        self.load()

    def update(self, name: str, config: dict):
        if not name:
            raise ValueError("Name must not be empty")
        with Session(self._engine) as session:
            item = session.query(_MCPTable).filter_by(name=name).first()
            if not item:
                raise ValueError(f"MCP server '{name}' not found")
            item.config = config  # type: ignore[assignment]
            session.commit()
        self.load()

    def delete(self, name: str):
        with Session(self._engine) as session:
            item = session.query(_MCPTable).filter_by(name=name).first()
            if item:
                session.delete(item)
                session.commit()
        self.load()

    def get_enabled_tools(self) -> list[str]:
        return [
            f"[MCP] {name}"
            for name, entry in self._info.items()
            if entry.get("config", {}).get("enabled_tools") is not None
        ]


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestMCPManagerAdd:
    def test_add_and_retrieve(self, manager):
        """add() persists data; get() and info() reflect it."""
        manager.add("server1", {"command": "uvx", "args": ["mcp-server-fetch"]})
        assert manager.info()["server1"]["config"]["command"] == "uvx"
        assert manager.get("server1")["name"] == "server1"

    def test_add_multiple(self, manager):
        manager.add("s1", {"command": "cmd1"})
        manager.add("s2", {"command": "cmd2"})
        assert set(manager.info().keys()) == {"s1", "s2"}

    @pytest.mark.parametrize("name", ["", "   "])
    def test_empty_or_whitespace_name_raises(self, manager, name):
        with pytest.raises(ValueError, match="Name must not be empty"):
            manager.add(name, {})

    def test_whitespace_name_is_stripped(self, manager):
        manager.add("  server1  ", {"command": "uvx"})
        assert "server1" in manager.info()

    def test_complex_config_stored_correctly(self, manager):
        config = {
            "command": "uvx",
            "env": {"JIRA_URL": "https://example.atlassian.net"},
            "enabled_tools": ["jira_search"],
        }
        manager.add("atlassian", config)
        stored = manager.get("atlassian")["config"]
        assert stored["env"]["JIRA_URL"] == "https://example.atlassian.net"
        assert stored["enabled_tools"] == ["jira_search"]


class TestMCPManagerUpdateDelete:
    def test_update_changes_config(self, manager):
        manager.add("s1", {"command": "cmd1"})
        manager.add("s2", {"command": "cmd2"})
        manager.update("s1", {"command": "updated"})
        assert manager.info()["s1"]["config"]["command"] == "updated"
        assert manager.info()["s2"]["config"]["command"] == "cmd2"  # untouched

    def test_update_nonexistent_raises(self, manager):
        with pytest.raises(ValueError, match="not found"):
            manager.update("ghost", {})

    def test_delete_removes_entry(self, manager):
        manager.add("s1", {})
        manager.add("s2", {})
        manager.delete("s1")
        assert "s1" not in manager.info()
        assert "s2" in manager.info()

    def test_delete_nonexistent_is_noop(self, manager):
        manager.delete("ghost")  # must not raise
        assert len(manager.info()) == 0


class TestMCPManagerGetEnabledTools:
    def test_only_servers_with_enabled_tools_listed(self, manager):
        manager.add("no_filter", {"command": "uvx"})
        manager.add("with_filter", {"command": "uvx", "enabled_tools": ["tool_a"]})
        choices = manager.get_enabled_tools()
        assert "[MCP] no_filter" not in choices
        assert "[MCP] with_filter" in choices

    def test_empty_when_no_servers(self, manager):
        assert manager.get_enabled_tools() == []


class TestMCPManagerLoad:
    def test_load_picks_up_external_db_changes(self, manager):
        manager.add("server1", {})
        with Session(manager._engine) as session:
            session.add(_MCPTable(name="external", config={"command": "ext"}))
            session.commit()

        assert "external" not in manager.info()  # not yet refreshed
        manager.load()
        assert "external" in manager.info()


================================================
FILE: libs/kotaemon/tests/test_mcp_tools.py
================================================
"""Tests for kotaemon.agents.tools.mcp module.

Covers config parsing, JSON Schema -> Pydantic model building,
tool formatting, and MCPTool construction (without real MCP servers).
"""

from types import SimpleNamespace
from unittest.mock import patch

import pytest

from kotaemon.agents.tools.mcp import (
    MCPTool,
    _json_schema_type_to_python,
    _make_tool,
    build_args_model,
    create_tools_from_config,
    format_tool_list,
    parse_mcp_config,
)

# ---------------------------------------------------------------------------
# _json_schema_type_to_python — parametrized to avoid 7 near-identical tests
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "json_type, expected",
    [
        ("string", str),
        ("integer", int),
        ("number", float),
        ("boolean", bool),
        ("object", dict),
        ("array", list),
        ("unknown_type", str),  # fallback
    ],
)
def test_json_schema_type_to_python(json_type, expected):
    assert _json_schema_type_to_python(json_type) is expected


# ---------------------------------------------------------------------------
# build_args_model
# ---------------------------------------------------------------------------


class TestBuildArgsModel:
    def test_model_fields_and_name(self):
        """Required + optional fields and the generated model name."""
        schema = {
            "properties": {
                "url": {"type": "string", "description": "The URL to fetch"},
                "timeout": {"type": "integer", "description": "Timeout in seconds"},
            },
            "required": ["url"],
        }
        model = build_args_model("fetch", schema)
        assert model.__name__ == "MCPArgs_fetch"
        assert model.model_fields["url"].is_required()
        assert not model.model_fields["timeout"].is_required()

    def test_optional_field_preserves_default(self):
        schema = {
            "properties": {
                "limit": {
                    "type": "integer",
                    "description": "Max results",
                    "default": 10,
                },
            },
            "required": [],
        }
        assert build_args_model("search", schema).model_fields["limit"].default == 10

    def test_empty_schema_produces_no_fields(self):
        assert len(build_args_model("empty", {}).model_fields) == 0


# ---------------------------------------------------------------------------
# parse_mcp_config
# ---------------------------------------------------------------------------


class TestParseMcpConfig:
    def test_full_stdio_config(self):
        config = {
            "transport": "stdio",
            "command": "uvx",
            "args": ["mcp-server-fetch"],
            "env": {"KEY": "value"},
        }
        parsed = parse_mcp_config(config)
        assert parsed == {
            "transport": "stdio",
            "command": "uvx",
            "args": ["mcp-server-fetch"],
            "env": {"KEY": "value"},
        }

    def test_defaults_for_empty_config(self):
        parsed = parse_mcp_config({})
        assert parsed["transport"] == "stdio"
        assert parsed["command"] == ""
        assert parsed["args"] == []
        assert parsed["env"] == {}

    def test_auto_split_multi_word_command(self):
        """stdio with no explicit args: space-delimited command is split."""
        parsed = parse_mcp_config(
            {"command": "npx -y mcp-remote https://example.com/sse"}
        )
        assert parsed["command"] == "npx"
        assert parsed["args"] == ["-y", "mcp-remote", "https://example.com/sse"]

    def test_no_split_when_args_already_provided(self):
        """Explicit args suppress the auto-split."""
        parsed = parse_mcp_config(
            {
                "command": "npx -y mcp-remote https://example.com/sse",
                "args": ["--flag"],
            }
        )
        assert parsed["command"] == "npx -y mcp-remote https://example.com/sse"
        assert parsed["args"] == ["--flag"]

    def test_sse_transport_uses_url_as_command(self):
        """For SSE, the url field becomes the effective command."""
        parsed = parse_mcp_config(
            {
                "transport": "sse",
                "url": "http://localhost:8080/sse",
                "command": "ignored",
            }
        )
        assert parsed["transport"] == "sse"
        assert parsed["command"] == "http://localhost:8080/sse"


# ---------------------------------------------------------------------------
# _make_tool
# ---------------------------------------------------------------------------


class TestMakeTool:
    def test_creates_mcp_tool_with_schema(self):
        parsed = {
            "transport": "stdio",
            "command": "uvx",
            "args": ["mcp-server-fetch"],
            "env": {},
        }
        tool_info = SimpleNamespace(
            name="fetch",
            description="Fetch a URL",
            inputSchema={
                "properties": {
                    "url": {"type": "string", "description": "URL to fetch"}
                },
                "required": ["url"],
            },
        )
        tool = _make_tool(parsed, tool_info)

        assert isinstance(tool, MCPTool)
        assert tool.name == "fetch"
        assert tool.description == "Fetch a URL"
        assert tool.server_transport == "stdio"
        assert tool.server_command == "uvx"
        assert tool.server_args == ["mcp-server-fetch"]

    def test_missing_schema_and_description_uses_defaults(self):
        """No inputSchema → args_schema is None; None description → auto-generated."""
        parsed = {"transport": "stdio", "command": "uvx", "args": [], "env": {}}
        tool_info = SimpleNamespace(name="ping", description=None)
        tool = _make_tool(parsed, tool_info)
        assert tool.description == "MCP tool: ping"
        assert tool.args_schema is None


# ---------------------------------------------------------------------------
# format_tool_list
# ---------------------------------------------------------------------------


class TestFormatToolList:
    def test_all_tools_enabled_by_default(self):
        tool_infos = [
            {"name": "fetch", "description": "Fetch a URL"},
            {"name": "search", "description": "Search the web"},
        ]
        result = format_tool_list(tool_infos)
        assert "2" in result
        assert "fetch" in result and "search" in result
        assert "All tools enabled" in result

    def test_partial_filter_shows_counts_and_icons(self):
        tool_infos = [
            {"name": "fetch", "description": "Fetch a URL"},
            {"name": "search", "description": "Search the web"},
        ]
        result = format_tool_list(tool_infos, enabled_tools=["fetch"])
        assert "1/2 tool(s) enabled" in result
        assert "✅" in result  # fetch enabled
        assert "⬜" in result  # search disabled

    def test_long_description_is_truncated(self):
        result = format_tool_list([{"name": "tool", "description": "A" * 200}])
        assert "A" * 121 not in result

    def test_none_description_shows_placeholder(self):
        result = format_tool_list([{"name": "tool", "description": None}])
        assert "No description" in result


# ---------------------------------------------------------------------------
# create_tools_from_config (mocked MCP server connection)
# ---------------------------------------------------------------------------


class TestCreateToolsFromConfig:
    def _make_mock_tools(self):
        return [
            MCPTool(
                name="fetch",
                description="Fetch",
                server_transport="stdio",
                server_command="uvx",
                mcp_tool_name="fetch",
            ),
            MCPTool(
                name="search",
                description="Search",
                server_transport="stdio",
                server_command="uvx",
                mcp_tool_name="search",
            ),
        ]

    @patch("kotaemon.agents.tools.mcp._run_async")
    def test_no_filter_returns_all(self, mock_run_async):
        mock_run_async.return_value = self._make_mock_tools()
        tools = create_tools_from_config({"command": "uvx"})
        assert len(tools) == 2

    @patch("kotaemon.agents.tools.mcp._run_async")
    def test_enabled_tools_filter(self, mock_run_async):
        """Non-empty filter returns only nominated tools; empty list returns all."""
        mock_run_async.return_value = self._make_mock_tools()
        filtered = create_tools_from_config({"command": "uvx"}, enabled_tools=["fetch"])
        assert len(filtered) == 1
        assert filtered[0].mcp_tool_name == "fetch"

        # Empty list == no filter
        mock_run_async.return_value = self._make_mock_tools()
        all_tools = create_tools_from_config({"command": "uvx"}, enabled_tools=[])
        assert len(all_tools) == 2


# ---------------------------------------------------------------------------
# MCPTool._format_result
# ---------------------------------------------------------------------------


class TestMCPToolFormatResult:
    def _make_tool(self):
        return MCPTool(
            name="test",
            description="Test tool",
            server_transport="stdio",
            server_command="echo",
            mcp_tool_name="test",
        )

    def test_text_content_joined(self):
        result = self._make_tool()._format_result(
            SimpleNamespace(
                isError=False,
                content=[SimpleNamespace(text="Hello"), SimpleNamespace(text="World")],
            )
        )
        assert result == "Hello\nWorld"

    def test_error_flag(self):
        result = self._make_tool()._format_result(
            SimpleNamespace(
                isError=True,
                content="Something went wrong",
            )
        )
        assert "MCP Tool Error" in result

    def test_binary_content(self):
        result = self._make_tool()._format_result(
            SimpleNamespace(
                isError=False,
                content=[SimpleNamespace(data=b"bytes", mimeType="image/png")],
            )
        )
        assert "[Binary data: image/png]" in result


================================================
FILE: libs/kotaemon/tests/test_post_processing.py
================================================
import pytest

from kotaemon.base import Document
from kotaemon.parsers import RegexExtractor


@pytest.fixture
def regex_extractor():
    return RegexExtractor(
        pattern=r"\d+", output_map={"1": "One", "2": "Two", "3": "Three"}
    )


def test_run_document(regex_extractor):
    document = Document(text="This is a test. 1 2 3")
    extracted_document = regex_extractor(document)[0]
    assert extracted_document.text == "One"
    assert extracted_document.matches == ["One", "Two", "Three"]


def test_run_raw(regex_extractor):
    output = regex_extractor("This is a test. 123")[0]
    assert output.text == "123"
    assert output.matches == ["123"]


def test_run_batch_raw(regex_extractor):
    output = regex_extractor(["This is a test. 123", "456"])
    extracted_text = [each.text for each in output]
    extracted_matches = [each.matches for each in output]
    assert extracted_text == ["123", "456"]
    assert extracted_matches == [["123"], ["456"]]


================================================
FILE: libs/kotaemon/tests/test_prompt.py
================================================
import pytest

from kotaemon.base import Document
from kotaemon.llms import BasePromptComponent, PromptTemplate
from kotaemon.parsers import RegexExtractor


def test_set_attributes():
    template = PromptTemplate("str = {s}, int = {i}, doc = {doc}, comp = {comp}")
    doc = Document(text="Helloo, Alice!")
    comp = RegexExtractor(
        pattern=r"\d+", output_map={"1": "One", "2": "Two", "3": "Three"}
    )
    comp.set_run(kwargs={"text": "This is a test. 1 2 3"}, temp=True)

    prompt = BasePromptComponent(template=template, s="Alice", i=30, doc=doc, comp=comp)
    assert prompt.s == "Alice"
    assert prompt.i == 30
    assert prompt.doc == doc
    assert prompt.comp == comp


def test_check_redundant_kwargs():
    template = PromptTemplate("Hello, {name}!")
    prompt = BasePromptComponent(template=template, name="Alice")
    with pytest.warns(UserWarning, match="Keys provided but not in template: age"):
        prompt._BasePromptComponent__check_redundant_kwargs(name="Alice", age=30)


def test_check_unset_placeholders():
    template = PromptTemplate("Hello, {name}! I'm {age} years old.")
    prompt = BasePromptComponent(template=template, name="Alice")
    with pytest.raises(ValueError):
        prompt._BasePromptComponent__check_unset_placeholders()


def test_validate_value_type():
    template = PromptTemplate("Hello, {name}!")
    prompt = BasePromptComponent(template=template)
    with pytest.raises(ValueError):
        prompt._BasePromptComponent__validate_value_type(name={})


def test_run():
    template = PromptTemplate("str = {s}, int = {i}, doc = {doc}, comp = {comp}")
    doc = Document(text="Helloo, Alice!")
    comp = RegexExtractor(
        pattern=r"\d+", output_map={"1": "One", "2": "Two", "3": "Three"}
    )
    comp.set_run(kwargs={"text": "This is a test. 1 2 3"}, temp=True)

    prompt = BasePromptComponent(template=template, s="Alice", i=30, doc=doc, comp=comp)

    result = prompt()

    assert result.text == "str = Alice, int = 30, doc = Helloo, Alice!, comp = ['One']"


def test_set_method():
    template = PromptTemplate("Hello, {name}!")
    prompt = BasePromptComponent(template=template)
    prompt.set_value(name="Alice")
    assert prompt.name == "Alice"


================================================
FILE: libs/kotaemon/tests/test_promptui.py
================================================
from kotaemon.contribs.promptui.config import export_pipeline_to_config
from kotaemon.contribs.promptui.export import export_from_dict
from kotaemon.contribs.promptui.ui import build_from_dict

from .simple_pipeline import Pipeline


class TestPromptConfig:
    def test_export_prompt_config(self):
        """Test if the prompt config is exported correctly"""
        pipeline = Pipeline()
        config_dict = export_pipeline_to_config(pipeline)
        config = list(config_dict.values())[0]

        assert "inputs" in config, "inputs should be in config"
        assert "text" in config["inputs"], "inputs should have config"

        assert "params" in config, "params should be in config"
        assert "llm.deployment_name" in config["params"]
        assert "llm.azure_endpoint" in config["params"]
        assert "llm.openai_api_key" in config["params"]
        assert "llm.openai_api_version" in config["params"]
        assert "llm.request_timeout" in config["params"]
        assert "llm.temperature" in config["params"]


class TestPromptUI:
    def test_uigeneration(self):
        """Test if the gradio UI is exposed without any problem"""
        pipeline = Pipeline()
        config = export_pipeline_to_config(pipeline)

        build_from_dict(config)


class TestExport:
    def test_export(self, tmp_path):
        """Test if the export functionality works without error"""
        from pathlib import Path

        import yaml
        from theflow.storage import storage

        config_path = tmp_path / "config.yaml"
        pipeline = Pipeline()
        Path(storage.url(pipeline.config.store_result)).mkdir(
            parents=True, exist_ok=True
        )

        config_dict = export_pipeline_to_config(pipeline)
        pipeline_name = list(config_dict.keys())[0]

        config_dict[pipeline_name]["logs"] = {
            "sheet1": {
                "inputs": [{"name": "text", "step": ".", "variable": "text"}],
                "outputs": [{"name": "answer", "step": "."}],
            },
        }
        with open(config_path, "w") as f:
            yaml.safe_dump(config_dict, f)

        export_from_dict(
            config=str(config_path),
            pipeline=pipeline_name,
            output_path=str(tmp_path / "exported.xlsx"),
        )


================================================
FILE: libs/kotaemon/tests/test_reader.py
================================================
from pathlib import Path
from unittest.mock import patch

from langchain.schema import Document as LangchainDocument
from llama_index.core.node_parser import SimpleNodeParser

from kotaemon.base import Document
from kotaemon.loaders import (
    AutoReader,
    AzureAIDocumentIntelligenceLoader,
    DocxReader,
    HtmlReader,
    MhtmlReader,
    UnstructuredReader,
)

from .conftest import skip_when_unstructured_pdf_not_installed


def test_docx_reader():
    reader = DocxReader()
    documents = reader.load_data(Path(__file__).parent / "resources" / "dummy.docx")

    assert len(documents)


def test_html_reader():
    reader = HtmlReader()
    documents = reader.load_data(
        Path(__file__).parent / "resources" / "html" / "dummy.html"
    )

    assert len(documents)


def test_pdf_reader():
    reader = AutoReader("PDFReader")
    dirpath = Path(__file__).parent
    documents = reader.load_data(dirpath / "resources" / "dummy.pdf")

    # check document reader output
    assert len(documents) == 1

    first_doc = documents[0]
    assert isinstance(first_doc, Document)
    assert first_doc.text.lower().replace(" ", "") == "dummypdffile"

    langchain_doc = first_doc.to_langchain_format()
    assert isinstance(langchain_doc, LangchainDocument)

    # test chunking using NodeParser from llama-index
    node_parser = SimpleNodeParser.from_defaults(chunk_size=100, chunk_overlap=20)
    nodes = node_parser.get_nodes_from_documents(documents)
    assert len(nodes) > 0


@skip_when_unstructured_pdf_not_installed
def test_unstructured_pdf_reader():
    reader = UnstructuredReader()
    dirpath = Path(__file__).parent
    input_path = dirpath / "resources/dummy.pdf"
    documents = reader.load_data(input_path)

    # check document reader output
    assert len(documents) == 1

    first_doc = documents[0]
    assert isinstance(first_doc, Document)
    assert first_doc.text.lower().replace(" ", "") == "dummypdffile"

    # split documents mode
    documents = reader.load_data(input_path, split_documents=True)
    # check document reader output
    assert len(documents) == 1


def test_mhtml_reader():
    reader = MhtmlReader()
    input_path = Path(__file__).parent / "resources" / "dummy.mhtml"
    docs = reader.load_data(input_path)

    assert len(docs) == 1
    assert docs[0].text.startswith("This is a test")


@patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
def test_azureai_document_intelligence_reader(mock_client):
    reader = AzureAIDocumentIntelligenceLoader(
        endpoint="https://endpoint.com",
        credential="credential",
    )
    docs = reader(Path(__file__).parent / "resources" / "dummy.pdf")

    assert len(docs) == 1
    mock_client.assert_called_once()


================================================
FILE: libs/kotaemon/tests/test_reranking.py
================================================
from unittest.mock import patch

import pytest
from openai.types.chat.chat_completion import ChatCompletion

from kotaemon.base import Document
from kotaemon.indices.rankings import LLMReranking
from kotaemon.llms import AzureChatOpenAI

_openai_chat_completion_responses = [
    ChatCompletion.parse_obj(
        {
            "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
            "object": "chat.completion",
            "created": 1692338378,
            "model": "gpt-35-turbo",
            "system_fingerprint": None,
            "choices": [
                {
                    "index": 0,
                    "finish_reason": "stop",
                    "message": {
                        "role": "assistant",
                        "content": text,
                        "function_call": None,
                        "tool_calls": None,
                    },
                    "logprobs": None,
                }
            ],
            "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
        }
    )
    for text in [
        "YES",
        "NO",
        "YES",
    ]
]


@pytest.fixture
def llm():
    return AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=_openai_chat_completion_responses,
)
def test_reranking(openai_completion, llm):
    documents = [Document(text=f"test {idx}") for idx in range(3)]
    query = "test query"

    reranker = LLMReranking(llm=llm, concurrent=False)
    rerank_docs = reranker(documents, query=query)

    assert len(rerank_docs) == 2


================================================
FILE: libs/kotaemon/tests/test_splitter.py
================================================
from llama_index.core.schema import NodeRelationship

from kotaemon.base import Document
from kotaemon.indices.splitters import TokenSplitter

source1 = Document(
    content="The City Hall and Raffles Place MRT stations are paired cross-platform "
    "interchanges on the North–South line (NSL) and East–West line (EWL) of the "
    "Singapore Mass Rapid Transit (MRT) system. Both are situated in the Downtown "
    "Core district: City Hall station is near landmarks such as the former City Hall, "
    "St Andrew's Cathedral and the Padang, while Raffles Place station serves Merlion "
    "Park, The Fullerton Hotel and the Asian Civilisations Museum. The stations were "
    "first announced in 1982. Constructing the tunnels between the City Hall and "
    "Raffles Place stations required the draining of the Singapore River. The "
    "stations opened on 12 December 1987 as part of the MRT extension to Outram Park "
    "station. Cross-platform transfers between the NSL and EWL began on 28 October "
    "1989, ahead of the split of the MRT network into two lines. Both stations are "
    "designated Civil Defence shelters. City Hall station features a mural by Simon"
    "Wong which depicts government buildings in the area, while two murals at Raffles "
    "Place station by Lim Sew Yong and Thang Kiang How depict scenes of Singapore's "
    "history"
)

source2 = Document(
    content="The pink cockatoo (Cacatua leadbeateri) is a medium-sized cockatoo that "
    "inhabits arid and semi-arid inland areas across Australia, with the exception of "
    "the north east. The bird has a soft-textured white and salmon-pink plumage and "
    "large, bright red and yellow crest. The sexes are quite similar, although males "
    "are usually bigger while the female has a broader yellow stripe on the crest and "
    "develops a red eye when mature. The pink cockatoo is usually found in pairs or "
    "small groups, and feeds both on the ground and in trees. It is listed as an "
    "endangered species by the Australian government. Formerly known as Major "
    "Mitchell's cockatoo, after the explorer Thomas Mitchell, the species was "
    "officially renamed the pink cockatoo in 2023 by BirdLife Australia in light of "
    "Mitchell's involvement in the massacre of Aboriginal people at Mount Dispersion, "
    "as well as a general trend to make Australian species names more culturally "
    "inclusive. This pink cockatoo with a raised crest was photographed near Mount "
    "Grenfell in New South Wales."
)


def test_split_token():
    """Test that it can split tokens successfully"""
    splitter = TokenSplitter(chunk_size=30, chunk_overlap=10)
    chunks = splitter([source1, source2])

    assert isinstance(chunks, list), "Chunks should be a list"
    assert isinstance(chunks[0], Document), "Chunks should be a list of Documents"

    assert chunks[0].relationships[NodeRelationship.SOURCE].node_id == source1.doc_id
    assert (
        chunks[1].relationships[NodeRelationship.PREVIOUS].node_id == chunks[0].doc_id
    )
    assert chunks[1].relationships[NodeRelationship.NEXT].node_id == chunks[2].doc_id
    assert chunks[-1].relationships[NodeRelationship.SOURCE].node_id == source2.doc_id


================================================
FILE: libs/kotaemon/tests/test_table_reader.py
================================================
import json
from pathlib import Path

import pytest

from kotaemon.loaders import MathpixPDFReader, OCRReader, PandasExcelReader

from .conftest import skip_when_unstructured_pdf_not_installed

input_file = Path(__file__).parent / "resources" / "table.pdf"
input_file_excel = Path(__file__).parent / "resources" / "dummy.xlsx"


@pytest.fixture
def fullocr_output():
    with open(
        Path(__file__).parent / "resources" / "fullocr_sample_output.json",
        encoding="utf-8",
    ) as f:
        fullocr = json.load(f)
    return fullocr


@pytest.fixture
def mathpix_output():
    with open(Path(__file__).parent / "resources" / "policy.md", encoding="utf-8") as f:
        content = f.read()
    return content


@skip_when_unstructured_pdf_not_installed
def test_ocr_reader(fullocr_output):
    reader = OCRReader()
    documents = reader.load_data(input_file, response_content=fullocr_output)
    table_docs = [doc for doc in documents if doc.metadata.get("type", "") == "table"]
    assert len(table_docs) == 2


def test_mathpix_reader(mathpix_output):
    reader = MathpixPDFReader()
    documents = reader.load_data(input_file, response_content=mathpix_output)
    table_docs = [doc for doc in documents if doc.metadata.get("type", "") == "table"]
    assert len(table_docs) == 4


def test_excel_reader():
    reader = PandasExcelReader()
    documents = reader.load_data(
        input_file_excel,
    )
    assert len(documents) == 1


================================================
FILE: libs/kotaemon/tests/test_telemetry.py
================================================
import os
import sys

import pytest

from .conftest import skip_when_haystack_not_installed


@pytest.fixture
def clean_artifacts_for_telemetry():
    try:
        del sys.modules["kotaemon"]
    except KeyError:
        pass

    try:
        del sys.modules["haystack"]
    except KeyError:
        pass

    try:
        del sys.modules["haystack.telemetry"]
    except KeyError:
        pass

    if "HAYSTACK_TELEMETRY_ENABLED" in os.environ:
        del os.environ["HAYSTACK_TELEMETRY_ENABLED"]


@pytest.mark.usefixtures("clean_artifacts_for_telemetry")
@skip_when_haystack_not_installed
def test_disable_telemetry_import_haystack_first():
    """Test that telemetry is disabled when kotaemon lib is initiated after"""
    import os

    import haystack.telemetry

    assert haystack.telemetry.telemetry is not None
    assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") != "False"

    import kotaemon  # noqa: F401

    assert haystack.telemetry.telemetry is None
    assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"


@pytest.mark.usefixtures("clean_artifacts_for_telemetry")
@skip_when_haystack_not_installed
def test_disable_telemetry_import_haystack_after_kotaemon():
    """Test that telemetry is disabled when kotaemon lib is initiated before"""
    import os

    import haystack.telemetry

    import kotaemon  # noqa: F401

    assert haystack.telemetry.telemetry is None
    assert os.environ.get("HAYSTACK_TELEMETRY_ENABLED", "True") == "False"


================================================
FILE: libs/kotaemon/tests/test_template.py
================================================
import pytest

from kotaemon.llms import PromptTemplate


def test_prompt_template_creation():
    # Ensure the PromptTemplate object is created correctly
    template_string = "This is a template"
    template = PromptTemplate(template_string)
    assert template.template == template_string

    template_string = "Hello, {name}! Today is {day}."
    template = PromptTemplate(template_string)
    assert template.template == template_string
    assert template.placeholders == {"name", "day"}


def test_prompt_template_creation_invalid_placeholder():
    # Ensure the PromptTemplate object handle invalid placeholder correctly
    template_string = "Hello, {name}! Today is {0day}."

    with pytest.raises(ValueError):
        PromptTemplate(template_string, ignore_invalid=False)

    with pytest.warns(
        UserWarning,
        match="Ignore invalid placeholder: 0day.",
    ):
        PromptTemplate(template_string, ignore_invalid=True)


def test_prompt_template_addition():
    # Ensure the __add__ method concatenates the templates correctly
    template1 = PromptTemplate("Hello, ")
    template2 = PromptTemplate("world!")
    result = template1 + template2
    assert result.template == "Hello, \nworld!"

    template1 = PromptTemplate("Hello, {name}!")
    template2 = PromptTemplate("Today is {day}.")
    result = template1 + template2
    assert result.template == "Hello, {name}!\nToday is {day}."


def test_prompt_template_extract_placeholders():
    # Ensure the PromptTemplate correctly extracts placeholders
    template_string = "Hello, {name}! Today is {day}."
    result = PromptTemplate(template_string).placeholders
    assert result == {"name", "day"}


def test_prompt_template_populate():
    # Ensure the populate method populates the template correctly
    template_string = "Hello, {name}! Today is {day}."
    template = PromptTemplate(template_string)
    result = template.populate(name="John", day="Monday")
    assert result == "Hello, John! Today is Monday."


def test_prompt_template_check_missing_kwargs():
    # Ensure the check_missing_kwargs and populate methods raise an exception for
    # missing placeholders
    template_string = "Hello, {name}! Today is {day}."
    template = PromptTemplate(template_string)
    kwargs = dict(name="John")

    with pytest.raises(ValueError):
        template.check_missing_kwargs(**kwargs)

    with pytest.raises(ValueError):
        template.populate(**kwargs)


def test_prompt_template_check_redundant_kwargs():
    # Ensure the check_redundant_kwargs, partial_populate and populate methods warn for
    # redundant placeholders
    template_string = "Hello, {name}! Today is {day}."
    template = PromptTemplate(template_string)
    kwargs = dict(name="John", day="Monday", age="30")

    with pytest.warns(UserWarning, match="Keys provided but not in template: age"):
        template.check_redundant_kwargs(**kwargs)

    with pytest.warns(UserWarning, match="Keys provided but not in template: age"):
        template.partial_populate(**kwargs)

    with pytest.warns(UserWarning, match="Keys provided but not in template: age"):
        template.populate(**kwargs)


def test_prompt_template_populate_complex_template():
    # Ensure the populate method produces the same results as the built-in str.format
    # function
    template_string = (
        "a = {a:.2f}, b = {b}, c = {c:.1%}, d = {d:#.0g}, ascii of {e} = {e!a:>2}"
    )
    template = PromptTemplate(template_string)
    kwargs = dict(a=1, b="two", c=3, d=4, e="á")
    populated = template.populate(**kwargs)
    expected = template_string.format(**kwargs)
    assert populated == expected


def test_prompt_template_partial_populate():
    # Ensure the partial_populate method populates correctly
    template_string = (
        "a = {a:.2f}, b = {b}, c = {c:.1%}, d = {d:#.0g}, ascii of {e} = {e!a:>2}"
    )
    template = PromptTemplate(template_string)
    kwargs = dict(a=1, b="two", d=4, e="á")
    populated = template.partial_populate(**kwargs)
    expected = "a = 1.00, b = two, c = {c:.1%}, d = 4., ascii of á = '\\xe1'"
    assert populated == expected


================================================
FILE: libs/kotaemon/tests/test_tools.py
================================================
import json
from pathlib import Path
from unittest.mock import patch

from openai.types.create_embedding_response import CreateEmbeddingResponse

from kotaemon.agents.tools import ComponentTool, GoogleSearchTool, WikipediaTool
from kotaemon.base import Document
from kotaemon.embeddings import AzureOpenAIEmbeddings
from kotaemon.indices.vectorindex import VectorIndexing, VectorRetrieval
from kotaemon.storages import ChromaVectorStore, InMemoryDocumentStore

with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
    openai_embedding = CreateEmbeddingResponse.model_validate(json.load(f))


def test_google_tool(mock_google_search):
    tool = GoogleSearchTool()
    assert tool.name
    assert tool.description
    output = tool("What is Cinnamon AI")
    assert output


def test_wikipedia_tool():
    tool = WikipediaTool()
    assert tool.name
    assert tool.description
    output = tool("Cinnamon")
    assert output


@patch(
    "openai.resources.embeddings.Embeddings.create",
    side_effect=lambda *args, **kwargs: openai_embedding,
)
def test_pipeline_tool(tmp_path):
    db = ChromaVectorStore(path=str(tmp_path))
    doc_store = InMemoryDocumentStore()
    embedding = AzureOpenAIEmbeddings(
        azure_deployment="embedding-deployment",
        azure_endpoint="https://test.openai.azure.com/",
        api_key="some-key",
        api_version="version",
    )

    index_pipeline = VectorIndexing(
        vector_store=db, embedding=embedding, doc_store=doc_store
    )
    retrieval_pipeline = VectorRetrieval(
        vector_store=db, doc_store=doc_store, embedding=embedding
    )

    index_tool = ComponentTool(
        name="index_document",
        description="A tool to use to index a document to be searched later",
        component=index_pipeline,
    )
    output = index_tool({"text": Document(text="Cinnamon AI")})

    retrieval_tool = ComponentTool(
        name="search_document",
        description="A tool to use to search a document in a vectorstore",
        component=retrieval_pipeline,
    )
    output = retrieval_tool("Cinnamon AI")
    assert output


================================================
FILE: libs/kotaemon/tests/test_vectorstore.py
================================================
import json
import os

import pytest

from kotaemon.base import DocumentWithEmbedding
from kotaemon.storages import (
    ChromaVectorStore,
    InMemoryVectorStore,
    MilvusVectorStore,
    QdrantVectorStore,
    SimpleFileVectorStore,
)


class TestChromaVectorStore:
    def test_add(self, tmp_path):
        """Test that the DB add correctly"""
        db = ChromaVectorStore(path=str(tmp_path))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        ids = ["1", "2"]

        assert db._collection.count() == 0, "Expected empty collection"
        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert output == ids, "Expected output to be the same as ids"
        assert db._collection.count() == 2, "Expected 2 added entries"

    def test_add_from_docs(self, tmp_path):
        db = ChromaVectorStore(path=str(tmp_path))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        documents = [
            DocumentWithEmbedding(embedding=embedding, metadata=metadata)
            for embedding, metadata in zip(embeddings, metadatas)
        ]
        assert db._collection.count() == 0, "Expected empty collection"
        output = db.add(documents)
        assert len(output) == 2, "Expected outputting 2 ids"
        assert db._collection.count() == 2, "Expected 2 added entries"

    def test_delete(self, tmp_path):
        db = ChromaVectorStore(path=str(tmp_path))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["a", "b", "c"]

        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert db._collection.count() == 3, "Expected 3 added entries"
        db.delete(ids=["a", "b"])
        assert db._collection.count() == 1, "Expected 1 remaining entry"
        db.delete(ids=["c"])
        assert db._collection.count() == 0, "Expected 0 remaining entry"

    def test_query(self, tmp_path):
        db = ChromaVectorStore(path=str(tmp_path))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["a", "b", "c"]

        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)

        _, sim, out_ids = db.query(embedding=[0.1, 0.2, 0.3], top_k=1)
        assert sim[0] - 1.0 < 1e-6
        assert out_ids == ["a"]

        _, _, out_ids = db.query(embedding=[0.42, 0.52, 0.53], top_k=1)
        assert out_ids == ["b"]

    def test_save_load_delete(self, tmp_path):
        """Test that save/load func behave correctly."""
        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["1", "2", "3"]
        db = ChromaVectorStore(path=str(tmp_path))
        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)

        db2 = ChromaVectorStore(path=str(tmp_path))
        assert (
            db2._collection.count() == 3
        ), "load function does not load data completely"

        # test delete collection function
        db2.drop()
        # reinit the chroma with the same collection name
        db2 = ChromaVectorStore(path=str(tmp_path))
        assert (
            db2._collection.count() == 0
        ), "delete collection function does not work correctly"


class TestInMemoryVectorStore:
    def test_add(self):
        """Test that add func adds correctly."""

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        ids = ["1", "2"]
        db = InMemoryVectorStore()

        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert output == ids, "Excepted output to be the same as ids"

    def test_save_load_delete(self, tmp_path):
        """Test that delete func deletes correctly."""
        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["1", "2", "3"]
        db = InMemoryVectorStore()
        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        db.delete(["3"])
        db.save(save_path=tmp_path / "test_save_load_delete.json")
        with open(tmp_path / "test_save_load_delete.json") as f:
            data = json.load(f)
        assert (
            "1" and "2" in data["text_id_to_ref_doc_id"]
        ), "save function does not save data completely"
        assert (
            "3" not in data["text_id_to_ref_doc_id"]
        ), "delete function does not delete data completely"
        db2 = InMemoryVectorStore()
        db2.load(load_path=tmp_path / "test_save_load_delete.json")
        assert db2.get("2") == [
            0.4,
            0.5,
            0.6,
        ], "load function does not load data completely"


class TestSimpleFileVectorStore:
    def test_add_delete(self, tmp_path):
        """Test that delete func deletes correctly."""
        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["1", "2", "3"]
        collection_name = "test_save_load_delete"
        db = SimpleFileVectorStore(path=tmp_path, collection_name=collection_name)
        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        db.delete(["3"])
        with open(tmp_path / collection_name) as f:
            data = json.load(f)
        assert (
            "1" and "2" in data["text_id_to_ref_doc_id"]
        ), "save function does not save data completely"
        assert (
            "3" not in data["text_id_to_ref_doc_id"]
        ), "delete function does not delete data completely"
        db2 = SimpleFileVectorStore(path=tmp_path, collection_name=collection_name)
        assert db2.get("2") == [
            0.4,
            0.5,
            0.6,
        ], "load function does not load data completely"

        os.remove(tmp_path / collection_name)


class TestMilvusVectorStore:
    def test_add(self, tmp_path):
        """Test that the DB add correctly"""
        db = MilvusVectorStore(
            path=str(tmp_path),
            overwrite=True,
        )

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        ids = ["1", "2"]

        assert db.count() == 0, "Expected empty collection"
        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert output == ids, "Expected output to be the same as ids"
        assert db.count() == 2, "Expected 2 added entries"

    def test_add_from_docs(self, tmp_path):
        db = MilvusVectorStore(
            path=str(tmp_path),
            overwrite=True,
        )

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        documents = [
            DocumentWithEmbedding(embedding=embedding, metadata=metadata)
            for embedding, metadata in zip(embeddings, metadatas)
        ]
        assert db.count() == 0, "Expected empty collection"
        output = db.add(documents)
        assert len(output) == 2, "Expected outputting 2 ids"
        assert db.count() == 2, "Expected 2 added entries"

    def test_delete(self, tmp_path):
        db = MilvusVectorStore(
            path=str(tmp_path),
            overwrite=True,
        )

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["a", "b", "c"]

        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert db.count() == 3, "Expected 3 added entries"
        db.delete(ids=["a", "b"])
        assert db.count() == 1, "Expected 1 remaining entry"
        db.delete(ids=["c"])
        assert db.count() == 0, "Expected 0 remaining entry"

    def test_query(self, tmp_path):
        db = MilvusVectorStore(path=str(tmp_path), overwrite=True)
        import numpy as np

        embeddings = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])
        norms = np.linalg.norm(embeddings, axis=1)
        normalized_embeddings = (embeddings / norms[:, np.newaxis]).tolist()

        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["a", "b", "c"]

        db.add(embeddings=normalized_embeddings, metadatas=metadatas, ids=ids)

        _, sim, out_ids = db.query(embedding=normalized_embeddings[0], top_k=1)
        assert sim[0] - 1.0 < 1e-6
        assert out_ids == ["a"]

        query_embedding = [
            normalized_embeddings[1][0] + 0.02,
            normalized_embeddings[1][1] + 0.02,
            normalized_embeddings[1][2] + 0.02,
        ]
        _, _, out_ids = db.query(embedding=query_embedding, top_k=1)
        assert out_ids == ["b"]

    def test_save_load_delete(self, tmp_path):
        """Test that save/load func behave correctly."""
        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = ["1", "2", "3"]
        db = MilvusVectorStore(path=str(tmp_path), overwrite=True)
        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)

        db2 = MilvusVectorStore(path=str(tmp_path), overrides=False)
        assert db2.count() == 3, "load function does not load data completely"

        # test delete collection function
        db2.drop()
        # reinit the milvus with the same collection name
        db2 = MilvusVectorStore(path=str(tmp_path), overwrite=False)
        assert db2.count() == 0, "delete collection function does not work correctly"


class TestQdrantVectorStore:
    def test_add(self):
        from qdrant_client import QdrantClient

        db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        ids = [
            "0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
            "90aba5d3-f4f8-47c6-bad9-5ea457442e07",
        ]

        output = db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert output == ids, "Expected output to be the same as ids"
        assert db.count() == 2, "Expected 2 added entries"

    def test_add_from_docs(self, tmp_path):
        from qdrant_client import QdrantClient

        db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
        documents = [
            DocumentWithEmbedding(embedding=embedding, metadata=metadata)
            for embedding, metadata in zip(embeddings, metadatas)
        ]

        output = db.add(documents)
        assert len(output) == 2, "Expected outputting 2 ids"
        assert db.count() == 2, "Expected 2 added entries"

    def test_delete(self, tmp_path):
        from qdrant_client import QdrantClient

        db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = [
            "0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
            "90aba5d3-f4f8-47c6-bad9-5ea457442e07",
            "6bed07c3-d284-47a3-a711-c3f9186755b8",
        ]

        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        assert db.count() == 3, "Expected 3 added entries"
        db.delete(
            ids=[
                "0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
                "90aba5d3-f4f8-47c6-bad9-5ea457442e07",
            ]
        )
        assert db.count() == 1, "Expected 1 remaining entry"
        db.delete(ids=["6bed07c3-d284-47a3-a711-c3f9186755b8"])
        assert db.count() == 0, "Expected 0 remaining entry"

    def test_query(self, tmp_path):
        from qdrant_client import QdrantClient

        db = QdrantVectorStore(collection_name="test", client=QdrantClient(":memory:"))

        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = [
            "0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
            "90aba5d3-f4f8-47c6-bad9-5ea457442e07",
            "6bed07c3-d284-47a3-a711-c3f9186755b8",
        ]

        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)

        _, sim, out_ids = db.query(embedding=[0.1, 0.2, 0.3], top_k=1)
        assert sim[0] - 1.0 < 1e-6
        assert out_ids == ["0f0611b3-2d9c-4818-ab69-1f1c4cf66693"]

        _, _, out_ids = db.query(embedding=[0.4, 0.5, 0.6], top_k=1)
        assert out_ids == ["90aba5d3-f4f8-47c6-bad9-5ea457442e07"]

    def test_save_load_delete(self, tmp_path):
        """Test that save/load func behave correctly."""
        embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]
        metadatas = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
        ids = [
            "0f0611b3-2d9c-4818-ab69-1f1c4cf66693",
            "90aba5d3-f4f8-47c6-bad9-5ea457442e07",
            "6bed07c3-d284-47a3-a711-c3f9186755b8",
        ]
        from qdrant_client import QdrantClient

        db = QdrantVectorStore(
            collection_name="test", client=QdrantClient(path=tmp_path)
        )
        db.add(embeddings=embeddings, metadatas=metadatas, ids=ids)
        del db

        db2 = QdrantVectorStore(
            collection_name="test", client=QdrantClient(path=tmp_path)
        )
        assert db2.count() == 3

        db2.drop()
        del db2

        db2 = QdrantVectorStore(
            collection_name="test", client=QdrantClient(path=tmp_path)
        )

        with pytest.raises(Exception):
            # Since no docs were added, the collection should not exist yet
            # and thus the count function should raise an exception
            db2.count()


================================================
FILE: libs/ktem/.gitignore
================================================
14-1_抜粋-1.pdf
_example_.db
ktem/assets/prebuilt/


================================================
FILE: libs/ktem/MANIFEST.in
================================================
include ktem/assets/css/*.css
include ktem/assets/img/*.svg
include ktem/assets/js/*.js
include ktem/assets/md/*.md


================================================
FILE: libs/ktem/alembic.ini
================================================
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = migrations

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to migrations/versions.  When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.

# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

sqlalchemy.url = driver://user:pass@localhost/dbname


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts.  See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S


================================================
FILE: libs/ktem/ktem/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/app.py
================================================
import os
from pathlib import Path
from typing import Optional

import gradio as gr
import pluggy
from ktem import extension_protocol
from ktem.assets import PDFJS_PREBUILT_DIR, KotaemonTheme
from ktem.components import reasonings
from ktem.exceptions import HookAlreadyDeclared, HookNotDeclared
from ktem.index import IndexManager
from ktem.settings import BaseSettingGroup, SettingGroup, SettingReasoningGroup
from theflow.settings import settings
from theflow.utils.modules import import_dotted_string

BASE_PATH = os.environ.get("GR_FILE_ROOT_PATH", "")


class BaseApp:
    """The main app of Kotaemon

    The main application contains app-level information:
        - setting state
        - dynamic conversation state
        - user id

    Also contains registering methods for:
        - reasoning pipelines
        - indexing & retrieval pipelines

    App life-cycle:
        - Render
        - Declare public events
        - Subscribe public events
        - Register events
    """

    public_events: list[str] = []

    def __init__(self):
        self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
        self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon")
        self.app_version = getattr(settings, "KH_APP_VERSION", "")
        self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
        self._theme = KotaemonTheme()

        dir_assets = Path(__file__).parent / "assets"
        with (dir_assets / "css" / "main.css").open() as fi:
            self._css = fi.read()
        with (dir_assets / "js" / "main.js").open() as fi:
            self._js = fi.read()
            self._js = self._js.replace("KH_APP_VERSION", self.app_version)
        with (dir_assets / "js" / "pdf_viewer.js").open(encoding="utf-8") as fi:
            self._pdf_view_js = fi.read()
            # workaround for Windows path
            pdf_js_dist_dir = str(PDFJS_PREBUILT_DIR).replace("\\", "\\\\")
            self._pdf_view_js = self._pdf_view_js.replace(
                "PDFJS_PREBUILT_DIR",
                pdf_js_dist_dir,
            ).replace("GR_FILE_ROOT_PATH", BASE_PATH)
        with (dir_assets / "js" / "svg-pan-zoom.min.js").open() as fi:
            self._svg_js = fi.read()

        self._favicon = str(dir_assets / "img" / "favicon.svg")

        self.default_settings = SettingGroup(
            application=BaseSettingGroup(settings=settings.SETTINGS_APP),
            reasoning=SettingReasoningGroup(settings=settings.SETTINGS_REASONING),
        )

        self._callbacks: dict[str, list] = {}
        self._events: dict[str, list] = {}

        self.register_extensions()
        self.register_reasonings()
        self.initialize_indices()

        self.default_settings.reasoning.finalize()
        self.default_settings.index.finalize()
        self.settings_state = gr.State(self.default_settings.flatten())

        self.user_id = gr.State("default" if not self.f_user_management else None)

    def initialize_indices(self):
        """Create the index manager, start indices, and register to app settings"""
        self.index_manager = IndexManager(self)
        self.index_manager.on_application_startup()

        for index in self.index_manager.indices:
            options = index.get_user_settings()
            self.default_settings.index.options[index.id] = BaseSettingGroup(
                settings=options
            )

    def register_reasonings(self):
        """Register the reasoning components from app settings"""
        if getattr(settings, "KH_REASONINGS", None) is None:
            return

        for value in settings.KH_REASONINGS:
            reasoning_cls = import_dotted_string(value, safe=False)
            rid = reasoning_cls.get_info()["id"]
            reasonings[rid] = reasoning_cls
            options = reasoning_cls().get_user_settings()
            self.default_settings.reasoning.options[rid] = BaseSettingGroup(
                settings=options
            )

    def register_extensions(self):
        """Register installed extensions"""
        self.exman = pluggy.PluginManager("ktem")
        self.exman.add_hookspecs(extension_protocol)
        self.exman.load_setuptools_entrypoints("ktem")

        # retrieve and register extension declarations
        extension_declarations = self.exman.hook.ktem_declare_extensions()
        for extension_declaration in extension_declarations:
            # if already in database, with the same version: skip

            # otherwise,
            # remove the old information from the database if it exists
            # store the information into the database

            functionality = extension_declaration["functionality"]

            # update the reasoning information
            if "reasoning" in functionality:
                for rid, rdec in functionality["reasoning"].items():
                    unique_rid = f"{extension_declaration['id']}/{rid}"
                    self.default_settings.reasoning.options[
                        unique_rid
                    ] = BaseSettingGroup(
                        settings=rdec["settings"],
                    )

    def declare_event(self, name: str):
        """Declare a public gradio event for other components to subscribe to

        Args:
            name: The name of the event
        """
        if name in self._events:
            raise HookAlreadyDeclared(f"Hook {name} is already declared")
        self._events[name] = []

    def subscribe_event(self, name: str, definition: dict):
        """Register a hook for the app

        Args:
            name: The name of the hook
            hook: The hook to be registered
        """
        if name not in self._events:
            raise HookNotDeclared(f"Hook {name} is not declared")
        self._events[name].append(definition)

    def get_event(self, name) -> list[dict]:
        if name not in self._events:
            raise HookNotDeclared(f"Hook {name} is not declared")

        return self._events[name]

    def ui(self):
        raise NotImplementedError

    def on_subscribe_public_events(self):
        """Subscribe to the declared public event of the app"""

    def on_register_events(self):
        """Register all events to the app"""

    def _on_app_created(self):
        """Called when the app is created"""

    def make(self):
        markmap_js = """
        <script>
            window.markmap = {
                /** @type AutoLoaderOptions */
                autoLoader: {
                    toolbar: true, // Enable toolbar
                },
            };
        </script>
        """
        external_js = (
            "<script type='module' "
            "src='https://cdn.skypack.dev/pdfjs-viewer-element'>"
            "</script>"
            "<script type='module' "
            "src='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.min.js'>"  # noqa
            f"{markmap_js}"
            "<script src='https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16'></script>"  # noqa
            "<script src='https://cdn.jsdelivr.net/npm/minisearch@7.1.1/dist/umd/index.min.js'></script>"  # noqa
            "</script>"
            "<link rel='stylesheet' href='https://cdnjs.cloudflare.com/ajax/libs/tributejs/5.1.3/tribute.css'/>"  # noqa
        )

        with gr.Blocks(
            theme=self._theme,
            css=self._css,
            title=self.app_name,
            analytics_enabled=False,
            js=self._js,
            head=external_js,
        ) as demo:
            self.app = demo
            self.settings_state.render()
            self.user_id.render()

            self.ui()

            self.declare_public_events()
            self.subscribe_public_events()
            self.register_events()
            self.on_app_created()

            demo.load(None, None, None, js=self._pdf_view_js)

        return demo

    def declare_public_events(self):
        """Declare an event for the app"""
        for event in self.public_events:
            self.declare_event(event)

        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.declare_public_events()

    def subscribe_public_events(self):
        """Subscribe to an event"""
        self.on_subscribe_public_events()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.subscribe_public_events()

    def register_events(self):
        """Register all events"""
        self.on_register_events()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.register_events()

    def on_app_created(self):
        """Execute on app created callbacks"""
        self._on_app_created()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.on_app_created()


class BasePage:
    """The logic of the Kotaemon app"""

    public_events: list[str] = []

    def __init__(self, app):
        self._app = app

    def on_building_ui(self):
        """Build the UI of the app"""

    def on_subscribe_public_events(self):
        """Subscribe to the declared public event of the app"""

    def on_register_events(self):
        """Register all events to the app"""

    def _on_app_created(self):
        """Called when the app is created"""

    def as_gradio_component(
        self,
    ) -> Optional[gr.components.Component | list[gr.components.Component]]:
        """Return the gradio components responsible for events

        Note: in ideal scenario, this method shouldn't be necessary.
        """
        return None

    def render(self):
        for value in self.__dict__.values():
            if isinstance(value, gr.blocks.Block):
                value.render()
            if isinstance(value, BasePage):
                value.render()

    def unrender(self):
        for value in self.__dict__.values():
            if isinstance(value, gr.blocks.Block):
                value.unrender()
            if isinstance(value, BasePage):
                value.unrender()

    def declare_public_events(self):
        """Declare an event for the app"""
        for event in self.public_events:
            self._app.declare_event(event)

        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.declare_public_events()

    def subscribe_public_events(self):
        """Subscribe to an event"""
        self.on_subscribe_public_events()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.subscribe_public_events()

    def register_events(self):
        """Register all events"""
        self.on_register_events()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.register_events()

    def on_app_created(self):
        """Execute on app created callbacks"""
        self._on_app_created()
        for value in self.__dict__.values():
            if isinstance(value, BasePage):
                value.on_app_created()


================================================
FILE: libs/ktem/ktem/assets/__init__.py
================================================
from pathlib import Path

from decouple import config

from .theme import Kotaemon as KotaemonTheme

PDFJS_VERSION_DIST: str = config("PDFJS_VERSION_DIST", "pdfjs-4.0.379-dist")
PDFJS_PREBUILT_DIR: Path = config(
    "PDFJS_PREBUILT_DIR", Path(__file__).parent / "prebuilt" / PDFJS_VERSION_DIST
)

__all__ = ["KotaemonTheme", "PDFJS_VERSION_DIST", "PDFJS_PREBUILT_DIR"]


================================================
FILE: libs/ktem/ktem/assets/css/main.css
================================================
:root {
  --main-area-height: calc(100vh - 110px);
}

/* no footer */
footer {
  display: none !important;
}

/* customize scrollbar */
::-webkit-scrollbar {
  background: var(--background-fill-primary);
}
::-webkit-scrollbar-thumb {
  background-color: var(--border-color-primary);
  border: 4px solid transparent;
  border-radius: 100px;
  background-clip: content-box;
}
::-webkit-scrollbar-corner {
  background: var(--background-fill-primary);
}

.gradio-container {
  max-width: 100% !important;
  /* overflow: scroll !important;
  height: 100% !important; */
}

/* styling for header bar */
.header-bar {
  background-color: transparent;
  margin: 0px 0px 20px;
  overflow-x: scroll;
  display: block !important;
  text-wrap: nowrap;
  border: none;
}
.header-bar button.selected {
  border: none;

  /* an alternative header bar style with rounded background */
  /* background-color: var(--background-fill-primary);
  border: 4px solid transparent;
  border-radius: var(--radius-lg);
  background-clip: padding-box; */
}

/* selected buttons have highlighted text */
button.selected {
  color: var(--block-label-text-color);
  font-weight: bold;
}

.message-row.bubble.bot-row{
  overflow-x: auto;
}

.flex-wrap.bot {
  overflow-x: inherit;
}

#chat-tab,
#indices-tab,
#settings-tab,
#help-tab,
#resources-tab,
#login-tab {
  border: none !important;
}

#help-tab,
#settings-tab {
  /* text-dense view should not be wide for readability */
  max-width: max(56vw, 900px) !important;
  margin: 0 auto !important;
}

.indices-tab,
#resources-tab {
  /* Other view should not be too wide */
  border: none !important;
  max-width: max(70vw, 1200px) !important;
  margin: 0 auto !important;
}

#main-chat-bot {
  background: var(--background-fill-primary);
  flex: auto;
}

#chat-area {
  height: var(--main-area-height) !important;
  column-gap: 2px !important;
}

#chat-info-panel {
  max-height: var(--main-area-height) !important;
  overflow: auto !important;
  transition: all 0.4s;
}

body.dark #chat-info-panel figure>img{
  filter: invert(100%);
}

#conv-settings-panel {
  max-height: var(--main-area-height) !important;
  flex-wrap: unset;
  overflow-y: scroll !important;
  position: sticky;
  column-gap: 2px !important;
  scrollbar-width: none;
  /* Firefox */
  -ms-overflow-style: none;
  /* Internet Explorer 10+ */
  transition: all 0.3s;
}

#conv-settings-panel::-webkit-scrollbar {
  /* WebKit */
  width: 0;
  height: 0;
}

td {
  /* Fix for Firefox Gradio table overflow display */
  overflow: hidden;
}

.setting-answer-mode-description {
  margin: 5px 5px 2px !important;
}

.message-buttons-right {
  display: none !important;
}

mark {
  background-color: #10b981;
}

/* clpse */
.clpse {
  background-color: var(--background-fill-secondary);
  font-weight: bold;
  cursor: pointer;
  padding: 3px;
  width: 100%;
  border: none;
  text-align: left;
  outline: none;
}

/* for setting transparent background for elements */
.no-background {
  background-color: transparent;
  border: none;
}

/* for setting bold text for elements */
.bold-text {
  font-weight: bold;
}

/* for setting highlighted text for elements */
.body-text-color {
  color: var(--body-text-color);
}

/* for setting right-aligned buttons */
.right-button {
  min-width: 200px !important;
  width: fit-content;
  padding-left: 20px;
  padding-right: 20px;
  margin: 0px 0px 0px auto;
}

/* for setting height limit for buttons */
.cap-button-height {
  max-height: 42px;
}

/* Hide sort buttons at gr.DataFrame */
.sort-button {
  display: none !important;
}

/* Show sort button only in File list*/
#file_list_view .sort-button {
  display: block !important;
}

#toggle-dark-button {
  position: fixed;
  top: 6px;
  right: 30px;
}

#info-expand-button {
  position: absolute;
  top: 6px;
  right: 15px;
}

/* prevent overflow of html info panel */
#html-info-panel {
  overflow-x: auto !important;
}

#chat-expand-button {
  position: absolute;
  top: 6px;
  right: -10px;
  z-index: 1;
}

#save-setting-btn {
  width: 150px;
  height: 30px;
  min-width: 100px !important;
}

#quick-setting-labels {
  margin-top: 5px;
  margin-bottom: -10px;
}

#use-mindmap-checkbox {
  position: absolute;
  width: 110px;
  top: 10px;
  right: 25px;
}

#citation-dropdown {
  width: min(25%, 100px);
  position: absolute;
  top: 2px;
  left: 120px;
  height: 35px;
}

#quick-url textarea {
  resize: none;
  background: transparent;
  margin-top: 0px;
}

#quick-url textarea::placeholder {
  text-align: center;
}

#quick-file {
  height: 110px;
}

span.icon {
  color: #cecece;
}

.upload-button {
  display: none;
}

.scrollable {
  overflow-y: auto;
}

.fill-main-area-height {
  max-height: var(--main-area-height);
}

.unset-overflow {
  overflow: unset !important;
}

pdfjs-viewer-element {
  height: 100dvh;
}

/* Modal styles */

.modal {
  display: none;
  position: relative;
  z-index: 2;
  left: 0;
  top: 0;
  width: 100%;
  height: 85dvh;
  overflow: hidden;
  background-color: rgba(0, 0, 0, 0.4);
}

.modal-header {
  padding: 0px 10px
}

.modal-content {
  background-color: #fefefe;
  height: 100%;
  display: flex;
  flex-direction: column;
}

.close {
  color: #aaa;
  align-self: flex-end;
  font-size: 28px;
  font-weight: bold;
}

.close:hover,
.close:focus {
  color: black;
  text-decoration: none;
  cursor: pointer;
}

.modal-body {
  flex: 1;
  overflow: hidden;
}

/* Switch checkbox styles */

/* #is-public-checkbox {
  position: relative;
  top: 4px;
} */

#suggest-chat-checkbox {
  position: relative;
  top: 4px;
}

.switch input {
  position: absolute;
  opacity: 0;
}

.switch {
  display: inline-block;
  /* 1 */
  height: 1em;
  width: 2em;
  background: #8f8f8f;
  border-radius: 1em;
  position: relative;
  top: 2px;
  margin-right: 1em;
}

.switch div {
  height: 1em;
  width: 1em;
  border-radius: 1em;
  background: #FFF;
  box-shadow: 0 0.1em 0.3em rgba(0, 0, 0, 0.3);
  -webkit-transition: all 300ms;
  -moz-transition: all 300ms;
  transition: all 300ms;
}

.switch input:checked+div {
  -webkit-transform: translate3d(100%, 0, 0);
  -moz-transform: translate3d(100%, 0, 0);
  transform: translate3d(100%, 0, 0);
  background: #12df9a;
}

.switch:has(> input:checked) {
  background: #0c895f;
}

/* Bot animation */

.message.bot {
  animation: fadein 1.0s ease-in-out forwards;
}

details.evidence {
  animation: fadein 0.3s ease-in-out forwards;
}

@keyframes fadein {
  0% {
    opacity: 0;
  }

  100% {
    opacity: 100%;
  }
}

.message a.citation {
  color: #10b981;
  text-decoration: none;
}

/* pop-up for file tag in chat input*/
.tribute-container ul {
  background-color: var(--background-fill-primary) !important;
  color: var(--body-text-color) !important;
  font-family: var(--font);
  font-size: var(--text-md);
}

.tribute-container li.highlight {
  background-color: var(--border-color-primary) !important;
}

/* a fix for flickering background in Gradio DataFrame */
tbody:not(.row_odd) {
  background: var(--table-even-background-fill);
}

#chat-suggestion {
  max-height: 350px;
}

#chat-suggestion table {
  overflow: hidden;
}

#chat-suggestion table thead {
  display: none;
}

#paper-suggestion table {
  overflow: hidden;
}

svg.markmap {
  width: 100%;
  height: 100%;
  font-family: Quicksand, sans-serif;
  font-size: 15px;
}

div.markmap {
  height: 400px;
}

#google-login {
  max-width: 450px;
}

#user-api-key-wrapper {
  max-width: 450px;
}

#login-row {
  display: grid;
  place-items: center;
}


================================================
FILE: libs/ktem/ktem/assets/js/main.js
================================================
function run() {
  let main_parent = document.getElementById("chat-tab").parentNode;

  main_parent.childNodes[0].classList.add("header-bar");
  main_parent.style = "padding: 0; margin: 0";
  main_parent.parentNode.style = "gap: 0";
  main_parent.parentNode.parentNode.style = "padding: 0";

  const version_node = document.createElement("p");
  version_node.innerHTML = "version: KH_APP_VERSION";
  version_node.style = "position: fixed; top: 10px; right: 10px;";
  main_parent.appendChild(version_node);

  // add favicon
  const favicon = document.createElement("link");
  // set favicon attributes
  favicon.rel = "icon";
  favicon.type = "image/svg+xml";
  favicon.href = "/favicon.ico";
  document.head.appendChild(favicon);

  // setup conversation dropdown placeholder
  let conv_dropdown = document.querySelector("#conversation-dropdown input");
  conv_dropdown.placeholder = "Browse conversation";

  // move info-expand-button
  let info_expand_button = document.getElementById("info-expand-button");
  let chat_info_panel = document.getElementById("info-expand");
  chat_info_panel.insertBefore(
    info_expand_button,
    chat_info_panel.childNodes[2]
  );

  // move toggle-side-bar button
  let chat_expand_button = document.getElementById("chat-expand-button");
  let chat_column = document.getElementById("main-chat-bot");
  let conv_column = document.getElementById("conv-settings-panel");

  // move setting close button
  let setting_tab_nav_bar = document.querySelector("#settings-tab .tab-nav");
  let setting_close_button = document.getElementById("save-setting-btn");
  if (setting_close_button) {
    setting_tab_nav_bar.appendChild(setting_close_button);
  }

  let default_conv_column_min_width = "min(300px, 100%)";
  conv_column.style.minWidth = default_conv_column_min_width;

  globalThis.toggleChatColumn = () => {
    /* get flex-grow value of chat_column */
    let flex_grow = conv_column.style.flexGrow;
    if (flex_grow == "0") {
      conv_column.style.flexGrow = "1";
      conv_column.style.minWidth = default_conv_column_min_width;
    } else {
      conv_column.style.flexGrow = "0";
      conv_column.style.minWidth = "0px";
    }
  };

  chat_column.insertBefore(chat_expand_button, chat_column.firstChild);

  // move use mind-map checkbox
  let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
  let citation_dropdown = document.getElementById("citation-dropdown");
  let chat_setting_panel = document.getElementById("chat-settings-expand");
  chat_setting_panel.insertBefore(
    mindmap_checkbox,
    chat_setting_panel.childNodes[2]
  );
  chat_setting_panel.insertBefore(citation_dropdown, mindmap_checkbox);

  // move share conv checkbox
  let report_div = document.querySelector(
    "#report-accordion > div:nth-child(3) > div:nth-child(1)"
  );
  let share_conv_checkbox = document.getElementById("is-public-checkbox");
  if (share_conv_checkbox) {
    report_div.insertBefore(share_conv_checkbox, report_div.querySelector("button"));
  }

  // create slider toggle
  const is_public_checkbox = document.getElementById("suggest-chat-checkbox");
  const label_element = is_public_checkbox.getElementsByTagName("label")[0];
  const checkbox_span = is_public_checkbox.getElementsByTagName("span")[0];
  new_div = document.createElement("div");

  label_element.classList.add("switch");
  is_public_checkbox.appendChild(checkbox_span);
  label_element.appendChild(new_div);

  // clpse
  globalThis.clpseFn = (id) => {
    var obj = document.getElementById("clpse-btn-" + id);
    obj.classList.toggle("clpse-active");
    var content = obj.nextElementSibling;
    if (content.style.display === "none") {
      content.style.display = "block";
    } else {
      content.style.display = "none";
    }
  };

  // store info in local storage
  globalThis.setStorage = (key, value) => {
    localStorage.setItem(key, value);
  };
  globalThis.getStorage = (key, value) => {
    item = localStorage.getItem(key);
    return item ? item : value;
  };
  globalThis.removeFromStorage = (key) => {
    localStorage.removeItem(key);
  };

  // Function to scroll to given citation with ID
  // Sleep function using Promise and setTimeout
  function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
  }

  globalThis.scrollToCitation = async (event) => {
    event.preventDefault(); // Prevent the default link behavior
    var citationId = event.target.getAttribute("id");

    await sleep(100); // Sleep for 100 milliseconds

    // check if modal is open
    var modal = document.getElementById("pdf-modal");
    var citation = document.querySelector('mark[id="' + citationId + '"]');

    if (modal.style.display == "block") {
      // trigger on click event of PDF Preview link
      var detail_elem = citation;
      // traverse up the DOM tree to find the parent element with tag detail
      while (detail_elem.tagName.toLowerCase() != "details") {
        detail_elem = detail_elem.parentElement;
      }
      detail_elem.getElementsByClassName("pdf-link").item(0).click();
    } else {
      if (citation) {
        citation.scrollIntoView({ behavior: "smooth" });
      }
    }
  };

  globalThis.fullTextSearch = () => {
    // Assign text selection event to last bot message
    var bot_messages = document.querySelectorAll(
      "div#main-chat-bot div.message-row.bot-row"
    );
    var last_bot_message = bot_messages[bot_messages.length - 1];

    // check if the last bot message has class "text_selection"
    if (last_bot_message.classList.contains("text_selection")) {
      return;
    }

    // assign new class to last message
    last_bot_message.classList.add("text_selection");

    // Get sentences from evidence div
    var evidences = document.querySelectorAll(
      "#html-info-panel > div:last-child > div > details.evidence div.evidence-content"
    );
    console.log("Indexing evidences", evidences);

    const segmenterEn = new Intl.Segmenter("en", { granularity: "sentence" });
    // Split sentences and save to all_segments list
    var all_segments = [];
    for (var evidence of evidences) {
      // check if <details> tag is open
      if (!evidence.parentElement.open) {
        continue;
      }
      var markmap_div = evidence.querySelector("div.markmap");
      if (markmap_div) {
        continue;
      }

      var evidence_content = evidence.textContent.replace(/[\r\n]+/g, " ");
      sentence_it = segmenterEn.segment(evidence_content)[Symbol.iterator]();
      while ((sentence = sentence_it.next().value)) {
        segment = sentence.segment.trim();
        if (segment) {
          all_segments.push({
            id: all_segments.length,
            text: segment,
          });
        }
      }
    }

    let miniSearch = new MiniSearch({
      fields: ["text"], // fields to index for full-text search
      storeFields: ["text"],
    });

    // Index all documents
    miniSearch.addAll(all_segments);

    last_bot_message.addEventListener("mouseup", () => {
      let selection = window.getSelection().toString();
      let results = miniSearch.search(selection);

      if (results.length == 0) {
        return;
      }
      let matched_text = results[0].text;
      console.log("query\n", selection, "\nmatched text\n", matched_text);

      var evidences = document.querySelectorAll(
        "#html-info-panel > div:last-child > div > details.evidence div.evidence-content"
      );
      // check if modal is open
      var modal = document.getElementById("pdf-modal");

      // convert all <mark> in evidences to normal text
      evidences.forEach((evidence) => {
        evidence.querySelectorAll("mark").forEach((mark) => {
          mark.outerHTML = mark.innerText;
        });
      });

      // highlight matched_text in evidences
      for (var evidence of evidences) {
        var evidence_content = evidence.textContent.replace(/[\r\n]+/g, " ");
        if (evidence_content.includes(matched_text)) {
          // select all p and li elements
          paragraphs = evidence.querySelectorAll("p, li");
          for (var p of paragraphs) {
            var p_content = p.textContent.replace(/[\r\n]+/g, " ");
            if (p_content.includes(matched_text)) {
              p.innerHTML = p_content.replace(
                matched_text,
                "<mark>" + matched_text + "</mark>"
              );
              console.log("highlighted", matched_text, "in", p);
              if (modal.style.display == "block") {
                // trigger on click event of PDF Preview link
                var detail_elem = p;
                // traverse up the DOM tree to find the parent element with tag detail
                while (detail_elem.tagName.toLowerCase() != "details") {
                  detail_elem = detail_elem.parentElement;
                }
                detail_elem.getElementsByClassName("pdf-link").item(0).click();
              } else {
                p.scrollIntoView({ behavior: "smooth", block: "center" });
              }
              break;
            }
          }
        }
      }
    });
  };

  globalThis.spawnDocument = (content, options) => {
    let opt = {
      window: "",
      closeChild: true,
      childId: "_blank",
    };
    Object.assign(opt, options);
    // minimal error checking
    if (
      content &&
      typeof content.toString == "function" &&
      content.toString().length
    ) {
      let child = window.open("", opt.childId, opt.window);
      child.document.write(content.toString());
      if (opt.closeChild) child.document.close();
      return child;
    }
  };

  globalThis.fillChatInput = (event) => {
    let chatInput = document.querySelector("#chat-input textarea");
    // fill the chat input with the clicked div text
    chatInput.value = "Explain " + event.target.textContent;
    var evt = new Event("change");
    chatInput.dispatchEvent(new Event("input", { bubbles: true }));
    chatInput.focus();
  };
}


================================================
FILE: libs/ktem/ktem/assets/js/pdf_viewer.js
================================================
function onBlockLoad() {
  var infor_panel_scroll_pos = 0;
  globalThis.createModal = () => {
    // Create modal for the 1st time if it does not exist
    var modal = document.getElementById("pdf-modal");
    var old_position = null;
    var old_width = null;
    var old_left = null;
    var expanded = false;

    modal.id = "pdf-modal";
    modal.className = "modal";
    modal.innerHTML = `
            <div class="modal-content">
              <div class="modal-header">
                <span class="close" id="modal-close">&times;</span>
                <span class="close" id="modal-expand">&#x26F6;</span>
              </div>
              <div class="modal-body">
                <pdfjs-viewer-element id="pdf-viewer" viewer-path="GR_FILE_ROOT_PATH/file=PDFJS_PREBUILT_DIR" locale="en" phrase="true">
                </pdfjs-viewer-element>
              </div>
            </div>
          `;

    modal.querySelector("#modal-close").onclick = function () {
      modal.style.display = "none";
      var info_panel = document.getElementById("html-info-panel");
      if (info_panel) {
        info_panel.style.display = "block";
      }
      var scrollableDiv = document.getElementById("chat-info-panel");
      scrollableDiv.scrollTop = infor_panel_scroll_pos;
    };

    modal.querySelector("#modal-expand").onclick = function () {
      expanded = !expanded;
      if (expanded) {
        old_position = modal.style.position;
        old_left = modal.style.left;
        old_width = modal.style.width;

        modal.style.position = "fixed";
        modal.style.width = "70%";
        modal.style.left = "15%";
        modal.style.height = "100dvh";
      } else {
        modal.style.position = old_position;
        modal.style.width = old_width;
        modal.style.left = old_left;
        modal.style.height = "85dvh";
      }
    };
  };

  function matchRatio(str1, str2) {
    let n = str1.length;
    let m = str2.length;

    let lcs = [];
    for (let i = 0; i <= n; i++) {
      lcs[i] = [];
      for (let j = 0; j <= m; j++) {
        lcs[i][j] = 0;
      }
    }

    let result = "";
    let max = 0;
    for (let i = 0; i < n; i++) {
      for (let j = 0; j < m; j++) {
        if (str1[i] === str2[j]) {
          lcs[i + 1][j + 1] = lcs[i][j] + 1;
          if (lcs[i + 1][j + 1] > max) {
            max = lcs[i + 1][j + 1];
            result = str1.substring(i - max + 1, i + 1);
          }
        }
      }
    }

    return result.length / Math.min(n, m);
  }

  globalThis.compareText = (search_phrases, page_label) => {
    var iframe = document.querySelector("#pdf-viewer").iframe;
    var innerDoc = iframe.contentDocument
      ? iframe.contentDocument
      : iframe.contentWindow.document;

    var renderedPages = innerDoc.querySelectorAll("div#viewer div.page");
    if (renderedPages.length == 0) {
      // if pages are not rendered yet, wait and try again
      setTimeout(() => compareText(search_phrases, page_label), 2000);
      return;
    }

    var query_selector =
      "#viewer > div[data-page-number='" +
      page_label +
      "'] > div.textLayer > span";
    var page_spans = innerDoc.querySelectorAll(query_selector);
    for (var i = 0; i < page_spans.length; i++) {
      var span = page_spans[i];
      if (
        span.textContent.length > 4 &&
        search_phrases.some(
          (phrase) => matchRatio(phrase, span.textContent) > 0.5
        )
      ) {
        span.innerHTML =
          "<span class='highlight selected'>" + span.textContent + "</span>";
      } else {
        // if span is already highlighted, remove it
        if (span.querySelector(".highlight")) {
          span.innerHTML = span.textContent;
        }
      }
    }
  };

  // Sleep function using Promise and setTimeout
  function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
  }

  // Function to open modal and display PDF
  globalThis.openModal = async (event) => {
    event.preventDefault();
    var target = event.currentTarget;
    var src = target.getAttribute("data-src");
    var page = target.getAttribute("data-page");
    var search = target.getAttribute("data-search");
    var highlighted_spans =
      target.parentElement.parentElement.querySelectorAll("mark");

    // Get text from highlighted spans
    var search_phrases = Array.from(highlighted_spans).map(
      (span) => span.textContent
    );
    // Use regex to strip 【id】from search phrases
    search_phrases = search_phrases.map((phrase) =>
      phrase.replace(/【\d+】/g, "")
    );

    // var phrase = target.getAttribute("data-phrase");

    var pdfViewer = document.getElementById("pdf-viewer");

    current_src = pdfViewer.getAttribute("src");
    if (current_src != src) {
      pdfViewer.setAttribute("src", src);
    }
    // pdfViewer.setAttribute("phrase", phrase);
    // pdfViewer.setAttribute("search", search);
    pdfViewer.setAttribute("page", page);

    var scrollableDiv = document.getElementById("chat-info-panel");
    infor_panel_scroll_pos = scrollableDiv.scrollTop;

    var modal = document.getElementById("pdf-modal");
    modal.style.display = "block";
    var info_panel = document.getElementById("html-info-panel");
    if (info_panel) {
      info_panel.style.display = "none";
    }
    scrollableDiv.scrollTop = 0;

    /* search for text inside PDF page */
    await sleep(500);
    compareText(search_phrases, page);
  };

  globalThis.assignPdfOnclickEvent = () => {
    // Get all links and attach click event
    var links = document.getElementsByClassName("pdf-link");
    for (var i = 0; i < links.length; i++) {
      links[i].onclick = openModal;
    }
  };

  var created_modal = document.getElementById("pdf-viewer");
  if (!created_modal) {
    createModal();
  }
}


================================================
FILE: libs/ktem/ktem/assets/md/about.md
================================================
# About Kotaemon

An open-source tool for you to chat with your documents.

[Source Code](https://github.com/Cinnamon/kotaemon) |
[Demo](https://huggingface.co/spaces/cin-model/kotaemon-demo)

[User Guide](https://cinnamon.github.io/kotaemon/) |
[Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
[Feedback](https://github.com/Cinnamon/kotaemon/issues)


================================================
FILE: libs/ktem/ktem/assets/md/changelogs.md
================================================
# Changelogs

## v0.0.1

- Chat: interact with chatbot with simple pipeline, rewoo and react agents
- Chat: conversation management: create, delete, rename conversations
- Files: upload files
- Files: select files as context for chatbot
- User management: create, sign-in, sign-out, change password
- Setting: common settings and pipeline-based settings
- Info panel: show Cinnamon AI and Kotaemon information


================================================
FILE: libs/ktem/ktem/assets/md/usage.md
================================================
# Basic Usage

## 1. Add your AI models

![resources tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/resources-tab.png)

- The tool uses Large Language Model (LLMs) to perform various tasks in a QA pipeline.
  So, you need to provide the application with access to the LLMs you want
  to use.
- You only need to provide at least one. However, tt is recommended that you include all the LLMs
  that you have access to, you will be able to switch between them while using the
  application.

To add a model:

1. Navigate to the `Resources` tab.
2. Select the `LLMs` sub-tab.
3. Select the `Add` sub-tab.
4. Config the model to add:
   - Give it a name.
   - Pick a vendor/provider (e.g. `ChatOpenAI`).
   - Provide the specifications.
   - (Optional) Set the model as default.
5. Click `Add` to add the model.
6. Select `Embedding Models` sub-tab and repeat the step 3 to 5 to add an embedding model.

<details markdown>

<summary>(Optional) Configure model via the .env file</summary>

Alternatively, you can configure the models via the `.env` file with the information needed to connect to the LLMs. This file is located in
the folder of the application. If you don't see it, you can create one.

Currently, the following providers are supported:

### OpenAI

In the `.env` file, set the `OPENAI_API_KEY` variable with your OpenAI API key in order
to enable access to OpenAI's models. There are other variables that can be modified,
please feel free to edit them to fit your case. Otherwise, the default parameter should
work for most people.

```shell
OPENAI_API_BASE=https://api.openai.com/v1
OPENAI_API_KEY=<your OpenAI API key here>
OPENAI_CHAT_MODEL=gpt-3.5-turbo
OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
```

### Azure OpenAI

For OpenAI models via Azure platform, you need to provide your Azure endpoint and API
key. Your might also need to provide your developments' name for the chat model and the
embedding model depending on how you set up Azure development.

```shell
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_API_KEY=
OPENAI_API_VERSION=2024-02-15-preview
AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
```

### Local models

- Pros:
- Privacy. Your documents will be stored and process locally.
- Choices. There are a wide range of LLMs in terms of size, domain, language to choose
  from.
- Cost. It's free.
- Cons:
- Quality. Local models are much smaller and thus have lower generative quality than
  paid APIs.
- Speed. Local models are deployed using your machine so the processing speed is
  limited by your hardware.

#### Find and download a LLM

You can search and download a LLM to be ran locally from the [Hugging Face
Hub](https://huggingface.co/models). Currently, these model formats are supported:

- GGUF

You should choose a model whose size is less than your device's memory and should leave
about 2 GB. For example, if you have 16 GB of RAM in total, of which 12 GB is available,
then you should choose a model that take up at most 10 GB of RAM. Bigger models tend to
give better generation but also take more processing time.

Here are some recommendations and their size in memory:

- [Qwen1.5-1.8B-Chat-GGUF](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf?download=true):
  around 2 GB

#### Enable local models

To add a local model to the model pool, set the `LOCAL_MODEL` variable in the `.env`
file to the path of the model file.

```shell
LOCAL_MODEL=<full path to your model file>
```

Here is how to get the full path of your model file:

- On Windows 11: right click the file and select `Copy as Path`.
</details>

## 2. Upload your documents

![file index tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/file-index-tab.png)

In order to do QA on your documents, you need to upload them to the application first.
Navigate to the `File Index` tab and you will see 2 sections:

1. File upload:
   - Drag and drop your file to the UI or select it from your file system.
     Then click `Upload and Index`.
   - The application will take some time to process the file and show a message once it is done.
2. File list:
   - This section shows the list of files that have been uploaded to the application and allows users to delete them.

## 3. Chat with your documents

![chat tab](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/chat-tab.png)

Now navigate back to the `Chat` tab. The chat tab is divided into 3 regions:

1. Conversation Settings Panel
   - Here you can select, create, rename, and delete conversations.
     - By default, a new conversation is created automatically if no conversation is selected.
   - Below that you have the file index, where you can choose whether to disable, select all files, or select which files to retrieve references from.
     - If you choose "Disabled", no files will be considered as context during chat.
     - If you choose "Search All", all files will be considered during chat.
     - If you choose "Select", a dropdown will appear for you to select the
       files to be considered during chat. If no files are selected, then no
       files will be considered during chat.
2. Chat Panel
   - This is where you can chat with the chatbot.
3. Information Panel
   - Supporting information such as the retrieved evidence and reference will be
     displayed here.


================================================
FILE: libs/ktem/ktem/assets/theme.py
================================================
from typing import Iterable

from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes

gray = colors.Color(
    name="dark",
    c50="#f9fafb",
    c100="#edeef0",
    c200="#e1e2e6",
    c300="#d5d6dd",
    c400="#cacbd5",
    c500="#acadb7",
    c600="#313138",
    c700="#25252b",
    c800="#19191e",
    c900="#0d0d11",
    c950="#010104",
)

err_txt = "#f05656"
gradient = "linear-gradient(90deg, *primary_400 20%, *secondary_500 80%)"
gradient_muted = "linear-gradient(90deg, *primary_500 20%, *secondary_600 80%)"

err_dark = "rgba(228, 98, 98, 1)"
err_dark_muted = "rgba(228, 98, 98, 0.75)"

err = "rgba(255, 93, 93, 1)"
err_muted = "rgba(237, 80, 80, 1)"


common = dict(
    # element colours
    color_accent="*primary_400",
    # shadows
    shadow_drop="0 0px 5px 1px rgb(0 0 0 / 0.05)",
    shadow_drop_lg="0 0 10px 3px rgba(0 0 0 / 0.06)",
    # layout atoms
    block_label_margin="*spacing_xl",
    block_label_padding="*spacing_xl",
    block_label_shadow="none",
    layout_gap="*spacing_xxl",
    section_header_text_size="*text_lg",
    # buttons
    button_shadow="none",
    button_shadow_active="*shadow_drop",
    button_shadow_hover="none",
    # button_large_radius="*radius_xxl",
    # button_small_radius="*radius_xxl",
    # -----
)
dark_mode = dict(
    # body attributes
    body_text_color_subdued_dark="*neutral_300",
    # element colours
    background_fill_secondary_dark="*neutral_950",
    border_color_accent_dark="rgba(255,255,255,0)",
    border_color_primary_dark="*neutral_600",
    color_accent_soft_dark="*secondary_400",
    # text
    link_text_color_dark="*secondary_200",
    link_text_color_active_dark="*secondary_300",
    link_text_color_visited_dark="*secondary_400",
    # layout atoms
    block_label_background_fill_dark="*neutral_800",
    block_label_border_width_dark="0px",
    block_label_text_color_dark="*primary_200",
    block_shadow_dark="none",
    block_title_text_color_dark="*primary_200",
    panel_border_width_dark="0px",
    # component atoms
    checkbox_background_color_selected_dark="*primary_400",
    checkbox_border_color_focus_dark="*primary_400",
    checkbox_border_color_selected_dark="*primary_500",
    checkbox_label_background_fill_selected_dark="*primary_200",
    checkbox_label_text_color_selected_dark="*neutral_700",
    error_border_color_dark=err_dark,
    error_text_color_dark="*neutral_100",
    error_icon_color_dark=err_dark,
    input_background_fill_dark="*neutral_600",
    input_border_color_dark="*input_background_fill",
    input_border_color_focus_dark="*input_background_fill",
    input_placeholder_color_dark="*neutral_500",
    loader_color_dark="*primary_200",
    slider_color_dark="*primary_300",
    stat_background_fill_dark="*secondary_100",
    table_border_color_dark="*neutral_800",
    table_even_background_fill_dark="*neutral_900",
    table_odd_background_fill_dark="*neutral_800",
    table_row_focus_dark="*neutral_600",
    # buttons
    button_primary_background_fill_dark=gradient,
    button_primary_background_fill_hover_dark=gradient_muted,
    button_secondary_background_fill_hover_dark="*neutral_700",
    button_cancel_background_fill_dark=err_dark,
    button_cancel_background_fill_hover_dark=err_dark_muted,
)
light_mode = dict(
    background_fill_primary="*neutral_50",
    background_fill_secondary="*neutral_50",
    # body attributes
    body_background_fill="*background_fill_primary",
    body_text_color_subdued="*neutral_600",
    border_color_accent="rgba(255,255,255,0)",
    border_color_primary="*neutral_300",
    color_accent_soft="*secondary_100",
    # text
    link_text_color="*secondary_400",
    link_text_color_visited="*secondary_700",
    # layout atoms
    block_label_border_width="0px",
    block_label_background_fill="white",
    block_label_text_color="*primary_600",
    block_shadow="none",
    block_title_text_color="*primary_600",
    panel_border_width="0px",
    # component atoms
    checkbox_background_color_selected="*primary_400",
    checkbox_border_color_focus="*primary_400",
    checkbox_border_color_selected="*primary_400",
    checkbox_label_border_color="*primary_200",
    error_background_fill="*background_fill_primary",
    error_border_color=err_muted,
    error_text_color="*neutral_800",
    input_background_fill="*neutral_200",
    input_border_color="*input_background_fill",
    input_border_color_focus="*input_background_fill",
    input_placeholder_color="*neutral_500",
    loader_color="*primary_300",
    slider_color="*primary_400",
    stat_background_fill="*secondary_300",
    table_even_background_fill="*neutral_100",
    table_odd_background_fill="*neutral_300",
    table_row_focus="*secondary_200",
    # buttons
    button_primary_background_fill=gradient_muted,
    button_primary_background_fill_hover=gradient,
    button_secondary_background_fill="*neutral_300",
    button_secondary_background_fill_hover="*neutral_100",
    button_cancel_background_fill=err_muted,
    button_cancel_background_fill_hover=err,
    button_cancel_text_color="*neutral_50",
)


class Kotaemon(Soft):
    """
    Official theme of Kotaemon.
    Public version: https://huggingface.co/spaces/lone17/kotaemon
    """

    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.emerald,
        secondary_hue: colors.Color | str = colors.blue,
        neutral_hue: colors.Color | str = gray,
        spacing_size: sizes.Size | str = sizes.spacing_md,
        radius_size: sizes.Size | str = sizes.radius_md,
        text_size: sizes.Size | str = sizes.text_md,
        font: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Quicksand"),
            "ui-sans-serif",
            "sans-serif",
        ),
        font_mono: fonts.Font
        | str
        | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("IBM Plex Mono"),
            "ui-monospace",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        self.name = "kotaemon"
        super().set(
            **common,
            **dark_mode,
            **light_mode,
        )


================================================
FILE: libs/ktem/ktem/components.py
================================================
"""Common components, some kind of config"""

import logging
from functools import cache
from pathlib import Path
from typing import Optional

from theflow.settings import settings
from theflow.utils.modules import deserialize

from kotaemon.base import BaseComponent
from kotaemon.storages import BaseDocumentStore, BaseVectorStore

logger = logging.getLogger(__name__)


filestorage_path = Path(settings.KH_FILESTORAGE_PATH)
filestorage_path.mkdir(parents=True, exist_ok=True)


@cache
def get_docstore(collection_name: str = "default") -> BaseDocumentStore:
    from copy import deepcopy

    ds_conf = deepcopy(settings.KH_DOCSTORE)
    ds_conf["collection_name"] = collection_name
    return deserialize(ds_conf, safe=False)


@cache
def get_vectorstore(collection_name: str = "default") -> BaseVectorStore:
    from copy import deepcopy

    vs_conf = deepcopy(settings.KH_VECTORSTORE)
    vs_conf["collection_name"] = collection_name
    return deserialize(vs_conf, safe=False)


class ModelPool:
    """Represent a pool of models"""

    def __init__(self, category: str, conf: dict):
        self._category = category
        self._conf = conf

        self._models: dict[str, BaseComponent] = {}
        self._accuracy: list[str] = []
        self._cost: list[str] = []
        self._default: list[str] = []

        for name, model in conf.items():
            self._models[name] = deserialize(model["spec"], safe=False)
            if model.get("default", False):
                self._default.append(name)

        self._accuracy = list(
            sorted(conf, key=lambda x: conf[x].get("accuracy", float("-inf")))
        )
        self._cost = list(sorted(conf, key=lambda x: conf[x].get("cost", float("inf"))))

    def __getitem__(self, key: str) -> BaseComponent:
        """Get model by name"""
        return self._models[key]

    def __setitem__(self, key: str, value: BaseComponent):
        """Set model by name"""
        self._models[key] = value

    def __delitem__(self, key: str):
        """Delete model by name"""
        del self._models[key]

    def __contains__(self, key: str) -> bool:
        """Check if model exists"""
        return key in self._models

    def get(
        self, key: str, default: Optional[BaseComponent] = None
    ) -> Optional[BaseComponent]:
        """Get model by name with default value"""
        return self._models.get(key, default)

    def settings(self) -> dict:
        """Present model pools option for gradio"""
        return {
            "label": self._category,
            "choices": list(self._models.keys()),
            "value": self.get_default_name(),
        }

    def options(self) -> dict:
        """Present a dict of models"""
        return self._models

    def get_random_name(self) -> str:
        """Get the name of random model

        Returns:
            str: random model name in the pool
        """
        import random

        if not self._conf:
            raise ValueError("No models in pool")

        return random.choice(list(self._conf.keys()))

    def get_default_name(self) -> str:
        """Get the name of default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            str: model name
        """
        if not self._conf:
            raise ValueError("No models in pool")

        if self._default:
            import random

            return random.choice(self._default)

        return self.get_random_name()

    def get_random(self) -> BaseComponent:
        """Get random model"""
        return self._models[self.get_random_name()]

    def get_default(self) -> BaseComponent:
        """Get default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            BaseComponent: model
        """
        return self._models[self.get_default_name()]

    def get_highest_accuracy_name(self) -> str:
        """Get the name of model with highest accuracy

        Returns:
            str: model name
        """
        if not self._conf:
            raise ValueError("No models in pool")
        return self._accuracy[-1]

    def get_highest_accuracy(self) -> BaseComponent:
        """Get model with highest accuracy

        Returns:
            BaseComponent: model
        """
        if not self._conf:
            raise ValueError("No models in pool")

        return self._models[self._accuracy[-1]]

    def get_lowest_cost_name(self) -> str:
        """Get the name of model with lowest cost

        Returns:
            str: model name
        """
        if not self._conf:
            raise ValueError("No models in pool")
        return self._cost[0]

    def get_lowest_cost(self) -> BaseComponent:
        """Get model with lowest cost

        Returns:
            BaseComponent: model
        """
        if not self._conf:
            raise ValueError("No models in pool")

        return self._models[self._cost[0]]


reasonings: dict = {}
tools = ModelPool("Tools", {})


================================================
FILE: libs/ktem/ktem/db/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/db/base_models.py
================================================
import datetime
import uuid
from typing import Optional

from sqlalchemy import JSON, Column
from sqlmodel import Field, SQLModel
from tzlocal import get_localzone


class BaseConversation(SQLModel):
    """Store the chat conversation between the user and the bot

    Attributes:
        id: canonical id to identify the conversation
        name: human-friendly name of the conversation
        user: the user id
        data_source: the data source of the conversation
        date_created: the date the conversation was created
        date_updated: the date the conversation was updated
    """

    __table_args__ = {"extend_existing": True}

    id: str = Field(
        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
    )
    name: str = Field(
        default_factory=lambda: "Untitled - {}".format(
            datetime.datetime.now(get_localzone()).strftime("%Y-%m-%d %H:%M:%S")
        )
    )
    user: str = Field(default="")  # For now we only have one user

    is_public: bool = Field(default=False)

    # contains messages + current files + chat_suggestions
    data_source: dict = Field(default={}, sa_column=Column(JSON))

    date_created: datetime.datetime = Field(
        default_factory=lambda: datetime.datetime.now(get_localzone())
    )
    date_updated: datetime.datetime = Field(
        default_factory=lambda: datetime.datetime.now(get_localzone())
    )


class BaseUser(SQLModel):
    """Store the user information

    Attributes:
        id: canonical id to identify the user
        username: the username of the user
        password: the hashed password of the user
    """

    __table_args__ = {"extend_existing": True}

    id: str = Field(
        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
    )
    username: str = Field(unique=True)
    username_lower: str = Field(unique=True)
    password: str
    admin: bool = Field(default=False)


class BaseSettings(SQLModel):
    """Record of user settings

    Attributes:
        id: canonical id to identify the settings
        user: the user id
        setting: the user settings (in dict/json format)
    """

    __table_args__ = {"extend_existing": True}

    id: str = Field(
        default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
    )
    user: str = Field(default="")
    setting: dict = Field(default={}, sa_column=Column(JSON))


class BaseIssueReport(SQLModel):
    """Store user-reported issues

    Attributes:
        id: canonical id to identify the issue report
        issues: the issues reported by the user, formatted as a dict
        chat: the conversation id when the user reported the issue
        settings: the user settings at the time of the issue report
        user: the user id
    """

    __table_args__ = {"extend_existing": True}

    id: Optional[int] = Field(default=None, primary_key=True)
    issues: dict = Field(default={}, sa_column=Column(JSON))
    chat: Optional[dict] = Field(default=None, sa_column=Column(JSON))
    settings: Optional[dict] = Field(default=None, sa_column=Column(JSON))
    user: Optional[str] = Field(default=None)


================================================
FILE: libs/ktem/ktem/db/engine.py
================================================
from sqlmodel import create_engine
from theflow.settings import settings

engine = create_engine(settings.KH_DATABASE)


================================================
FILE: libs/ktem/ktem/db/models.py
================================================
import ktem.db.base_models as base_models
from ktem.db.engine import engine
from sqlmodel import SQLModel
from theflow.settings import settings
from theflow.utils.modules import import_dotted_string

_base_conv = (
    import_dotted_string(settings.KH_TABLE_CONV, safe=False)
    if hasattr(settings, "KH_TABLE_CONV")
    else base_models.BaseConversation
)

_base_user = (
    import_dotted_string(settings.KH_TABLE_USER, safe=False)
    if hasattr(settings, "KH_TABLE_USER")
    else base_models.BaseUser
)

_base_settings = (
    import_dotted_string(settings.KH_TABLE_SETTINGS, safe=False)
    if hasattr(settings, "KH_TABLE_SETTINGS")
    else base_models.BaseSettings
)

_base_issue_report = (
    import_dotted_string(settings.KH_TABLE_ISSUE_REPORT, safe=False)
    if hasattr(settings, "KH_TABLE_ISSUE_REPORT")
    else base_models.BaseIssueReport
)


class Conversation(_base_conv, table=True):  # type: ignore
    """Conversation record"""


class User(_base_user, table=True):  # type: ignore
    """User table"""


class Settings(_base_settings, table=True):  # type: ignore
    """Record of settings"""


class IssueReport(_base_issue_report, table=True):  # type: ignore
    """Record of issues"""


if not getattr(settings, "KH_ENABLE_ALEMBIC", False):
    SQLModel.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/embeddings/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/embeddings/db.py
================================================
from typing import Type

from ktem.db.engine import engine
from sqlalchemy import JSON, Boolean, Column, String
from sqlalchemy.orm import DeclarativeBase
from theflow.settings import settings as flowsettings
from theflow.utils.modules import import_dotted_string


class Base(DeclarativeBase):
    pass


class BaseEmbeddingTable(Base):
    """Base table to store language model"""

    __abstract__ = True

    name = Column(String, primary_key=True, unique=True)
    spec = Column(JSON, default={})
    default = Column(Boolean, default=False)


_base_llm: Type[BaseEmbeddingTable] = (
    import_dotted_string(flowsettings.KH_EMBEDDING_LLM, safe=False)
    if hasattr(flowsettings, "KH_EMBEDDING_LLM")
    else BaseEmbeddingTable
)


class EmbeddingTable(_base_llm):  # type: ignore
    __tablename__ = "embedding"


if not getattr(flowsettings, "KH_ENABLE_ALEMBIC", False):
    EmbeddingTable.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/embeddings/manager.py
================================================
from typing import Optional, Type

from sqlalchemy import select
from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings
from theflow.utils.modules import deserialize

from kotaemon.embeddings.base import BaseEmbeddings

from .db import EmbeddingTable, engine


class EmbeddingManager:
    """Represent a pool of models"""

    def __init__(self):
        self._models: dict[str, BaseEmbeddings] = {}
        self._info: dict[str, dict] = {}
        self._default: str = ""
        self._vendors: list[Type] = []

        # populate the pool if empty
        if hasattr(flowsettings, "KH_EMBEDDINGS"):
            with Session(engine) as sess:
                count = sess.query(EmbeddingTable).count()
            if not count:
                for name, model in flowsettings.KH_EMBEDDINGS.items():
                    self.add(
                        name=name,
                        spec=model["spec"],
                        default=model.get("default", False),
                    )

        self.load()
        self.load_vendors()

    def load(self):
        """Load the model pool from database"""
        self._models, self._info, self._default = {}, {}, ""
        with Session(engine) as sess:
            stmt = select(EmbeddingTable)
            items = sess.execute(stmt)

            for (item,) in items:
                self._models[item.name] = deserialize(item.spec, safe=False)
                self._info[item.name] = {
                    "name": item.name,
                    "spec": item.spec,
                    "default": item.default,
                }
                if item.default:
                    self._default = item.name
                    self._models["default"] = self._models[item.name]

    def load_vendors(self):
        from kotaemon.embeddings import (
            AzureOpenAIEmbeddings,
            FastEmbedEmbeddings,
            LCCohereEmbeddings,
            LCGoogleEmbeddings,
            LCHuggingFaceEmbeddings,
            LCMistralEmbeddings,
            OpenAIEmbeddings,
            TeiEndpointEmbeddings,
            VoyageAIEmbeddings,
        )

        self._vendors = [
            AzureOpenAIEmbeddings,
            OpenAIEmbeddings,
            FastEmbedEmbeddings,
            LCCohereEmbeddings,
            LCHuggingFaceEmbeddings,
            LCGoogleEmbeddings,
            LCMistralEmbeddings,
            TeiEndpointEmbeddings,
            VoyageAIEmbeddings,
        ]

    def __getitem__(self, key: str) -> BaseEmbeddings:
        """Get model by name"""
        return self._models[key]

    def __contains__(self, key: str) -> bool:
        """Check if model exists"""
        return key in self._models

    def get(
        self, key: str, default: Optional[BaseEmbeddings] = None
    ) -> Optional[BaseEmbeddings]:
        """Get model by name with default value"""
        return self._models.get(key, default)

    def settings(self) -> dict:
        """Present model pools option for gradio"""
        return {
            "label": "Embedding",
            "choices": list(self._models.keys()),
            "value": self.get_default_name(),
        }

    def options(self) -> dict:
        """Present a dict of models"""
        return self._models

    def get_random_name(self) -> str:
        """Get the name of random model

        Returns:
            str: random model name in the pool
        """
        import random

        if not self._models:
            raise ValueError("No models in pool")

        return random.choice(list(self._models.keys()))

    def get_default_name(self) -> str:
        """Get the name of default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            str: model name
        """
        if not self._models:
            raise ValueError("No models in pool")

        if not self._default:
            return self.get_random_name()

        return self._default

    def get_random(self) -> BaseEmbeddings:
        """Get random model"""
        return self._models[self.get_random_name()]

    def get_default(self) -> BaseEmbeddings:
        """Get default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            BaseEmbeddings: model
        """
        return self._models[self.get_default_name()]

    def info(self) -> dict:
        """List all models"""
        return self._info

    def add(self, name: str, spec: dict, default: bool):
        """Add a new model to the pool"""
        if not name:
            raise ValueError("Name must not be empty")

        try:
            with Session(engine) as sess:
                if default:
                    # turn all models to non-default
                    sess.query(EmbeddingTable).update({"default": False})
                    sess.commit()

                item = EmbeddingTable(name=name, spec=spec, default=default)
                sess.add(item)
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to add model {name}: {e}")

        self.load()

    def delete(self, name: str):
        """Delete a model from the pool"""
        try:
            with Session(engine) as sess:
                item = sess.query(EmbeddingTable).filter_by(name=name).first()
                sess.delete(item)
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to delete model {name}: {e}")

        self.load()

    def update(self, name: str, spec: dict, default: bool, new_name: str = ""):
        """Update a model in the pool, optionally renaming it."""
        if not name:
            raise ValueError("Name must not be empty")

        # If update name
        if new_name and new_name != name:
            if new_name in self._info:
                raise ValueError(
                    f"Model '{new_name}' already exists. Use a unique name."
                )
            self.delete(name)
            self.add(new_name, spec=spec, default=default)
            return

        try:
            with Session(engine) as sess:

                if default:
                    # turn all models to non-default
                    sess.query(EmbeddingTable).update({"default": False})
                    sess.commit()

                item = sess.query(EmbeddingTable).filter_by(name=name).first()
                if not item:
                    raise ValueError(f"Model {name} not found")
                item.spec = spec
                item.default = default
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to update model {name}: {e}")

        self.load()

    def vendors(self) -> dict:
        """Return list of vendors"""
        return {vendor.__qualname__: vendor for vendor in self._vendors}


embedding_models_manager = EmbeddingManager()


================================================
FILE: libs/ktem/ktem/embeddings/ui.py
================================================
from copy import deepcopy

import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
from ktem.utils.file import YAMLNoDateSafeLoader
from theflow.utils.modules import deserialize

from .manager import embedding_models_manager


def format_description(cls):
    params = cls.describe()["params"]
    params_lines = ["| Name | Type | Description |", "| --- | --- | --- |"]
    for key, value in params.items():
        if isinstance(value["auto_callback"], str):
            continue
        params_lines.append(f"| {key} | {value['type']} | {value['help']} |")
    return f"{cls.__doc__}\n\n" + "\n".join(params_lines)


class EmbeddingManagement(BasePage):
    def __init__(self, app):
        self._app = app
        self.spec_desc_default = (
            "# Spec description\n\nSelect a model to view the spec description."
        )
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab(label="View"):
            self.emb_list = gr.DataFrame(
                headers=["name", "vendor", "default"],
                interactive=False,
                column_widths=[30, 40, 30],
            )

            with gr.Column(visible=False) as self._selected_panel:
                self.selected_emb_name = gr.Textbox(value="", visible=False)
                with gr.Row():
                    with gr.Column():
                        self.edit_default = gr.Checkbox(
                            label="Set default",
                            info=(
                                "Set this Embedding model as default. This default "
                                "Embedding will be used by other components by default "
                                "if no Embedding is specified for such components."
                            ),
                        )
                        self.edit_name = gr.Textbox(
                            label="Name",
                            info="Edit to rename this Embedding model.",
                        )
                        self.edit_spec = gr.Textbox(
                            label="Specification",
                            info="Specification of the Embedding model in YAML format",
                            lines=10,
                        )

                        with gr.Accordion(
                            label="Test connection", visible=False, open=False
                        ) as self._check_connection_panel:
                            with gr.Row():
                                with gr.Column(scale=4):
                                    self.connection_logs = gr.HTML(
                                        "Logs",
                                    )

                                with gr.Column(scale=1):
                                    self.btn_test_connection = gr.Button("Test")

                        with gr.Row(visible=False) as self._selected_panel_btn:
                            with gr.Column():
                                self.btn_edit_save = gr.Button(
                                    "Save", min_width=10, variant="primary"
                                )
                            with gr.Column():
                                self.btn_delete = gr.Button(
                                    "Delete", min_width=10, variant="stop"
                                )
                                with gr.Row():
                                    self.btn_delete_yes = gr.Button(
                                        "Confirm Delete",
                                        variant="stop",
                                        visible=False,
                                        min_width=10,
                                    )
                                    self.btn_delete_no = gr.Button(
                                        "Cancel", visible=False, min_width=10
                                    )
                            with gr.Column():
                                self.btn_close = gr.Button("Close", min_width=10)

                    with gr.Column():
                        self.edit_spec_desc = gr.Markdown("# Spec description")

        with gr.Tab(label="Add"):
            with gr.Row():
                with gr.Column(scale=2):
                    self.name = gr.Textbox(
                        label="Name",
                        info=(
                            "Must be unique and non-empty. "
                            "The name will be used to identify the embedding model."
                        ),
                    )
                    self.emb_choices = gr.Dropdown(
                        label="Vendors",
                        info=(
                            "Choose the vendor of the Embedding model. Each vendor "
                            "has different specification."
                        ),
                    )
                    self.spec = gr.Textbox(
                        label="Specification",
                        info="Specification of the Embedding model in YAML format.",
                    )
                    self.default = gr.Checkbox(
                        label="Set default",
                        info=(
                            "Set this Embedding model as default. This default "
                            "Embedding will be used by other components by default "
                            "if no Embedding is specified for such components."
                        ),
                    )
                    self.btn_new = gr.Button("Add", variant="primary")

                with gr.Column(scale=3):
                    self.spec_desc = gr.Markdown(self.spec_desc_default)

    def _on_app_created(self):
        """Called when the app is created"""
        self._app.app.load(
            self.list_embeddings,
            inputs=[],
            outputs=[self.emb_list],
        )
        self._app.app.load(
            lambda: gr.update(choices=list(embedding_models_manager.vendors().keys())),
            outputs=[self.emb_choices],
        )

    def on_emb_vendor_change(self, vendor):
        vendor = embedding_models_manager.vendors()[vendor]

        required: dict = {}
        desc = vendor.describe()
        for key, value in desc["params"].items():
            if value.get("required", False):
                required[key] = value.get("default", None)

        return yaml.dump(required), format_description(vendor)

    def on_register_events(self):
        self.emb_choices.select(
            self.on_emb_vendor_change,
            inputs=[self.emb_choices],
            outputs=[self.spec, self.spec_desc],
        )
        self.btn_new.click(
            self.create_emb,
            inputs=[self.name, self.emb_choices, self.spec, self.default],
            outputs=None,
        ).success(self.list_embeddings, inputs=[], outputs=[self.emb_list]).success(
            lambda: ("", None, "", False, self.spec_desc_default),
            outputs=[
                self.name,
                self.emb_choices,
                self.spec,
                self.default,
                self.spec_desc,
            ],
        )
        self.emb_list.select(
            self.select_emb,
            inputs=self.emb_list,
            outputs=[self.selected_emb_name],
            show_progress="hidden",
        )
        self.selected_emb_name.change(
            self.on_selected_emb_change,
            inputs=[self.selected_emb_name],
            outputs=[
                self._selected_panel,
                self._selected_panel_btn,
                # delete section
                self.btn_delete,
                self.btn_delete_yes,
                self.btn_delete_no,
                # edit section
                self.edit_name,
                self.edit_spec,
                self.edit_spec_desc,
                self.edit_default,
            ],
            show_progress="hidden",
        ).success(lambda: gr.update(value=""), outputs=[self.connection_logs])

        self.btn_delete.click(
            self.on_btn_delete_click,
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_emb,
            inputs=[self.selected_emb_name],
            outputs=[self.selected_emb_name],
            show_progress="hidden",
        ).then(
            self.list_embeddings,
            inputs=[],
            outputs=[self.emb_list],
        )
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_edit_save.click(
            self.save_emb,
            inputs=[
                self.selected_emb_name,
                self.edit_name,
                self.edit_default,
                self.edit_spec,
            ],
            outputs=[self.selected_emb_name],
            show_progress="hidden",
        ).then(
            self.list_embeddings,
            inputs=[],
            outputs=[self.emb_list],
        )
        self.btn_close.click(
            lambda: "",
            outputs=[self.selected_emb_name],
        )

        self.btn_test_connection.click(
            self.check_connection,
            inputs=[self.selected_emb_name, self.edit_spec],
            outputs=[self.connection_logs],
        )

    def create_emb(self, name, choices, spec, default):
        try:
            name = name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = (
                embedding_models_manager.vendors()[choices].__module__
                + "."
                + embedding_models_manager.vendors()[choices].__qualname__
            )

            embedding_models_manager.add(name, spec=spec, default=default)
            gr.Info(f'Embedding model "{name}" created successfully')
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(f"Failed to create Embedding model '{name}': {e}")

    def list_embeddings(self):
        """List the Embedding models"""
        items = []
        for item in embedding_models_manager.info().values():
            record = {}
            record["name"] = item["name"]
            record["vendor"] = item["spec"].get("__type__", "-").split(".")[-1]
            record["default"] = item["default"]
            items.append(record)

        if items:
            emb_list = pd.DataFrame.from_records(items)
        else:
            emb_list = pd.DataFrame.from_records(
                [{"name": "-", "vendor": "-", "default": "-"}]
            )

        return emb_list

    def select_emb(self, emb_list, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No embedding model is loaded. Please add first")
            return ""

        if not ev.selected:
            return ""

        return emb_list["name"][ev.index[0]]

    def on_selected_emb_change(self, selected_emb_name):
        if selected_emb_name == "":
            _selected_panel = gr.update(visible=False)
            _selected_panel_btn = gr.update(visible=False)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)
            edit_name = gr.update(value="")
            edit_spec = gr.update(value="")
            edit_spec_desc = gr.update(value="")
            edit_default = gr.update(value=False)
        else:
            _selected_panel = gr.update(visible=True)
            _selected_panel_btn = gr.update(visible=True)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)

            info = deepcopy(embedding_models_manager.info()[selected_emb_name])
            vendor_str = info["spec"].pop("__type__", "-").split(".")[-1]
            vendor = embedding_models_manager.vendors()[vendor_str]

            edit_name = selected_emb_name
            edit_spec = yaml.dump(info["spec"])
            edit_spec_desc = format_description(vendor)
            edit_default = info["default"]

        return (
            _selected_panel,
            _selected_panel_btn,
            btn_delete,
            btn_delete_yes,
            btn_delete_no,
            edit_name,
            edit_spec,
            edit_spec_desc,
            edit_default,
        )

    def on_btn_delete_click(self):
        btn_delete = gr.update(visible=False)
        btn_delete_yes = gr.update(visible=True)
        btn_delete_no = gr.update(visible=True)

        return btn_delete, btn_delete_yes, btn_delete_no

    def check_connection(self, selected_emb_name, selected_spec):
        log_content: str = ""
        try:
            log_content += f"- Testing model: {selected_emb_name}<br>"
            yield log_content

            # Parse content & init model
            info = deepcopy(embedding_models_manager.info()[selected_emb_name])

            # Parse content & create dummy embedding
            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)
            info["spec"].update(spec)

            emb = deserialize(info["spec"], safe=False)

            if emb is None:
                raise Exception(f"Can not found model: {selected_emb_name}")

            log_content += "- Sending a message `Hi`<br>"
            yield log_content
            _ = emb("Hi")

            log_content += (
                "<mark style='background: green; color: white'>- Connection success. "
                "</mark><br>"
            )
            yield log_content

            gr.Info(f"Embedding {selected_emb_name} connect successfully")
        except Exception as e:
            print(e)
            log_content += (
                f"<mark style='color: yellow; background: red'>- Connection failed. "
                f"Got error:\n {str(e)}</mark>"
            )
            yield log_content

        return log_content

    def save_emb(self, selected_emb_name, edit_name, default, spec):
        try:
            new_name = edit_name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = embedding_models_manager.info()[selected_emb_name][
                "spec"
            ]["__type__"]
            embedding_models_manager.update(
                selected_emb_name, spec=spec, default=default, new_name=new_name
            )
            final_name = (
                new_name if new_name != selected_emb_name else selected_emb_name
            )
            gr.Info(f'Embedding model "{final_name}" saved successfully')
            return final_name
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(f'Failed to save Embedding model "{selected_emb_name}": {e}')

    def delete_emb(self, selected_emb_name):
        try:
            embedding_models_manager.delete(selected_emb_name)
        except Exception as e:
            gr.Error(f'Failed to delete Embedding model "{selected_emb_name}": {e}')
            return selected_emb_name

        return ""


================================================
FILE: libs/ktem/ktem/exceptions.py
================================================
class KHException(Exception):
    pass


class HookNotDeclared(KHException):
    pass


class HookAlreadyDeclared(KHException):
    pass


================================================
FILE: libs/ktem/ktem/extension_protocol.py
================================================
import pluggy

hookspec = pluggy.HookspecMarker("ktem")
hookimpl = pluggy.HookimplMarker("ktem")


@hookspec
def ktem_declare_extensions() -> dict:  # type: ignore
    """Called before the run() function is executed.

    This hook is called without any arguments, and should return a dictionary.
    The dictionary has the following structure:

        ```
        {
            "id": str,      # cannot contain . or /
            "name": str,    # human-friendly name of the plugin
            "version": str,
            "support_host": str,
            "functionality": {
                "reasoning": {
                    id: {                         # cannot contain . or /
                        "name": str,
                        "callbacks": {},
                        "settings": {},
                    },
                },
                "index": {
                    "name": str,
                    "callbacks": {
                        "get_index_pipeline": callable,
                        "get_retrievers": {name: callable}
                    },
                    "settings": {},
                },
            },
        }
        ```
    """


================================================
FILE: libs/ktem/ktem/index/__init__.py
================================================
from .manager import IndexManager

__all__ = ["IndexManager"]


================================================
FILE: libs/ktem/ktem/index/base.py
================================================
import abc
import logging
from typing import TYPE_CHECKING, Any, Optional

if TYPE_CHECKING:
    from ktem.app import BasePage

    from kotaemon.base import BaseComponent


logger = logging.getLogger(__name__)


class BaseIndex(abc.ABC):
    """The base class for the index

    The index is responsible for storing information in a searchable manner, and
    retrieving that information.

    An application can have multiple indices. For example:
        - An index of files locally in the computer
        - An index of chat messages on Discord, Slack, etc.
        - An index of files stored on Google Drie, Dropbox, etc.
        - ...

    User can create, delete, and manage the indices in this application. They
    can create an index, set it to track a local folder in their computer, and
    then the chatbot can search for files in that folder. The user can create
    another index to track their chat messages on Discords. And so on.

    This class defines the interface for the index. It concerns with:
        - Setting up the necessary software infrastructure for the index to work
        (e.g. database table, vector store collection, etc.).
        - Providing the UI for user interaction with the index, including settings.

    Methods:

        __init__: initiate any resource definition required for the index to work
            (e.g. database table, vector store collection, etc.).
        on_create: called only once, when the user creates the index.
        on_delete: called only once, when the user deletes the index.
        on_start: called when the index starts.
        get_selector_component_ui: return the UI component to select the entities in
            the Chat page. Called in the ChatUI page.
        get_index_page_ui: return the index page UI to manage the entities. Called in
            the main application UI page.
        get_user_settings: return default user settings. Called only when the app starts
        get_admin_settings: return the admin settings. Called only when the user
            creates the index (for the admin to customize it). The output will be
            stored in the Index's config.
        get_indexing_pipeline: return the indexing pipeline when the entities are
            populated into the index
        get_retriever_pipelines: return the retriever pipelines when the user chat
    """

    def __init__(self, app, id, name, config):
        self._app = app
        self.id = id
        self.name = name
        self.config = config  # admin settings

    def on_create(self):
        """Create the index for the first time"""

    def on_delete(self):
        """Trigger when the user delete the index"""

    def on_start(self):
        """Trigger when the index start

        Args:
            id (int): the id of the index
            name (str): the name of the index
            config (dict): the config of the index
        """

    def get_selector_component_ui(self) -> Optional["BasePage"]:
        """The UI component to select the entities in the Chat page"""
        return None

    def get_index_page_ui(self) -> Optional["BasePage"]:
        """The index page UI to manage the entities"""
        return None

    @classmethod
    def get_user_settings(cls) -> dict:
        """Return default user settings. These are the runtime settings.

        The settings will be populated in the user settings page. And will be used
        when initiating the indexing & retriever pipelines.

        Returns:
            dict: user settings in the dictionary format of
                `ktem.settings.SettingItem`
        """
        return {}

    @classmethod
    def get_admin_settings(cls) -> dict:
        """Return the default admin settings. These are the build-time settings.

        The settings will be populated in the admin settings page. And will be used
        when initiating the indexing & retriever pipelines.

        Returns:
            dict: user settings in the dictionary format of
                `ktem.settings.SettingItem`
        """
        return {}

    @abc.abstractmethod
    def get_indexing_pipeline(
        self, settings: dict, user_id: Optional[int]
    ) -> "BaseComponent":
        """Return the indexing pipeline that populates the entities into the index

        Args:
            settings: the user settings of the index
            user_id: the user id who is accessing the index
                TODO: instead of having a user_id, should have an app_state
                which might also contain the settings.

        Returns:
            BaseIndexing: the indexing pipeline
        """
        ...

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseComponent"]:
        """Return the retriever pipelines to retrieve the entity from the index"""
        return []


================================================
FILE: libs/ktem/ktem/index/file/__init__.py
================================================
from .index import FileIndex

__all__ = ["FileIndex"]


================================================
FILE: libs/ktem/ktem/index/file/base.py
================================================
from pathlib import Path
from typing import Generator, Optional

from kotaemon.base import BaseComponent, Document, Param


class BaseFileIndexRetriever(BaseComponent):

    Source = Param(help="The SQLAlchemy Source table")
    Index = Param(help="The SQLAlchemy Index table")
    VS = Param(help="The VectorStore")
    DS = Param(help="The DocStore")
    FSPath = Param(help="The file storage path")
    user_id = Param(help="The user id")

    @classmethod
    def get_user_settings(cls) -> dict:
        """Get the user settings for indexing

        Returns:
            dict: user settings in the dictionary format of
                `ktem.settings.SettingItem`
        """
        return {}

    @classmethod
    def get_pipeline(
        cls,
        user_settings: dict,
        index_settings: dict,
        selected: Optional[list] = None,
    ) -> "BaseFileIndexRetriever":
        raise NotImplementedError


class BaseFileIndexIndexing(BaseComponent):
    """The pipeline to index information into the data store

    You should define the following method:
        - run(self, file_paths): run the indexing given the pipeline
        - get_pipeline(cls, user_settings, index_settings): return the
          fully-initialized pipeline, ready to be used by ktem.

    You will have access to the following resources:
        - self._Source: the source table
        - self._Index: the index table
        - self._VS: the vector store
        - self._DS: the docstore
    """

    Source = Param(help="The SQLAlchemy Source table")
    Index = Param(help="The SQLAlchemy Index table")
    VS = Param(help="The VectorStore")
    DS = Param(help="The DocStore")
    FSPath = Param(help="The file storage path")
    user_id = Param(help="The user id")
    private = Param(False, help="Whether this is private index")
    chunk_size = Param(help="Chunk size for this index")
    chunk_overlap = Param(help="Chunk overlap for this index")

    def run(
        self, file_paths: str | Path | list[str | Path], *args, **kwargs
    ) -> tuple[list[str | None], list[str | None]]:
        """Run the indexing pipeline

        Args:
            file_paths (str | Path | list[str | Path]): the file paths to index

        Returns:
            - the indexed file ids (each file id corresponds to an input file path, or
                None if the indexing failed for that file path)
            - the error messages (each error message corresponds to an input file path,
                or None if the indexing was successful for that file path)
        """
        raise NotImplementedError

    def stream(
        self, file_paths: str | Path | list[str | Path], *args, **kwargs
    ) -> Generator[
        Document, None, tuple[list[str | None], list[str | None], list[Document]]
    ]:
        """Stream the indexing pipeline

        Args:
            file_paths (str | Path | list[str | Path]): the file paths to index

        Yields:
            Document: the output message to the UI, must have channel == index or debug

        Returns:
            - the indexed file ids (each file id corresponds to an input file path, or
                None if the indexing failed for that file path)
            - the error messages (each error message corresponds to an input file path,
                or None if the indexing was successful for that file path)
            - the indexed documents in form of list[Documents]
        """
        raise NotImplementedError

    @classmethod
    def get_pipeline(
        cls, user_settings: dict, index_settings: dict
    ) -> "BaseFileIndexIndexing":
        raise NotImplementedError

    @classmethod
    def get_user_settings(cls) -> dict:
        """Get the user settings for indexing

        Returns:
            dict: user settings in the dictionary format of
                `ktem.settings.SettingItem`
        """
        return {}

    def copy_to_filestorage(
        self, file_paths: str | Path | list[str | Path]
    ) -> list[str]:
        """Copy to file storage and return the new path, relative to the file storage

        Args:
            file_path: the file path to copy

        Returns:
            the new file paths, relative to the file storage
        """
        import shutil
        from hashlib import sha256

        if not isinstance(file_paths, list):
            file_paths = [file_paths]

        paths = []
        for file_path in file_paths:
            with open(file_path, "rb") as f:
                paths.append(sha256(f.read()).hexdigest())
            shutil.copy(file_path, self.FSPath / paths[-1])

        return paths

    def get_filestorage_path(self, rel_paths: str | list[str]) -> list[str]:
        """Get the file storage path for the relative path

        Args:
            rel_paths: the relative path to the file storage

        Returns:
            the absolute file storage path to the file
        """
        raise NotImplementedError

    def warning(self, msg):
        """Log a warning message

        Args:
            msg: the message to log
        """
        print(msg)

    def rebuild_index(self):
        """Rebuild the index"""
        raise NotImplementedError


================================================
FILE: libs/ktem/ktem/index/file/exceptions.py
================================================
from ktem.exceptions import KHException


class FileExistsError(KHException):
    pass


================================================
FILE: libs/ktem/ktem/index/file/graph/__init__.py
================================================
from .graph_index import GraphRAGIndex
from .light_graph_index import LightRAGIndex
from .nano_graph_index import NanoGraphRAGIndex

__all__ = ["GraphRAGIndex", "NanoGraphRAGIndex", "LightRAGIndex"]


================================================
FILE: libs/ktem/ktem/index/file/graph/graph_index.py
================================================
from typing import Any

from ktem.index.file import FileIndex

from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever
from .pipelines import GraphRAGIndexingPipeline, GraphRAGRetrieverPipeline


class GraphRAGIndex(FileIndex):
    def _setup_indexing_cls(self):
        self._indexing_pipeline_cls = GraphRAGIndexingPipeline

    def _setup_retriever_cls(self):
        self._retriever_pipeline_cls = [GraphRAGRetrieverPipeline]

    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
        """Define the interface of the indexing pipeline"""

        obj = super().get_indexing_pipeline(settings, user_id)
        # disable vectorstore for this kind of Index
        obj.VS = None

        return obj

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseFileIndexRetriever"]:
        file_ids = self._selector_ui.get_selected_ids(selected)
        retrievers = [
            GraphRAGRetrieverPipeline(
                file_ids=file_ids,
                Index=self._resources["Index"],
            )
        ]

        return retrievers


================================================
FILE: libs/ktem/ktem/index/file/graph/light_graph_index.py
================================================
from typing import Any, Optional
from uuid import uuid4

from ktem.db.engine import engine
from sqlalchemy.orm import Session

from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever
from .graph_index import GraphRAGIndex
from .lightrag_pipelines import LightRAGIndexingPipeline, LightRAGRetrieverPipeline


class LightRAGIndex(GraphRAGIndex):
    def __init__(self, app, id: int, name: str, config: dict):
        super().__init__(app, id, name, config)
        self._collection_graph_id: Optional[str] = None

    def _setup_indexing_cls(self):
        self._indexing_pipeline_cls = LightRAGIndexingPipeline

    def _setup_retriever_cls(self):
        self._retriever_pipeline_cls = [LightRAGRetrieverPipeline]

    def _get_or_create_collection_graph_id(self):
        if self._collection_graph_id:
            return self._collection_graph_id

        # Try to find existing graph ID for this collection
        with Session(engine) as session:
            result = (
                session.query(self._resources["Index"].target_id)  # type: ignore
                .filter(
                    self._resources["Index"].relation_type == "graph"  # type: ignore
                )
                .first()
            )
            if result:
                self._collection_graph_id = result[0]
            else:
                self._collection_graph_id = str(uuid4())
        return self._collection_graph_id

    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
        pipeline = super().get_indexing_pipeline(settings, user_id)
        # indexing settings
        prefix = f"index.options.{self.id}."
        striped_settings = {
            key[len(prefix) :]: value
            for key, value in settings.items()
            if key.startswith(prefix)
        }
        # set the prompts
        pipeline.prompts = striped_settings
        # set collection graph id
        pipeline.collection_graph_id = self._get_or_create_collection_graph_id()
        # set index batch size
        pipeline.index_batch_size = striped_settings.get(
            "batch_size", pipeline.index_batch_size
        )
        return pipeline

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseFileIndexRetriever"]:
        file_ids = self._selector_ui.get_selected_ids(selected)
        # retrieval settings
        prefix = f"index.options.{self.id}."
        search_type = settings.get(prefix + "search_type", "local")

        retrievers = [
            LightRAGRetrieverPipeline(
                file_ids=file_ids,
                Index=self._resources["Index"],
                search_type=search_type,
            )
        ]

        return retrievers


================================================
FILE: libs/ktem/ktem/index/file/graph/lightrag_pipelines.py
================================================
import asyncio
import glob
import logging
import os
import re
from pathlib import Path
from typing import Generator

import numpy as np
import pandas as pd
from ktem.db.models import engine
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from sqlalchemy.orm import Session
from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_exponential,
)
from theflow.settings import settings

from kotaemon.base import Document, Param, RetrievedDocument
from kotaemon.base.schema import AIMessage, HumanMessage, SystemMessage

from ..pipelines import BaseFileIndexRetriever
from .pipelines import GraphRAGIndexingPipeline
from .visualize import create_knowledge_graph, visualize_graph

try:
    from lightrag import LightRAG, QueryParam

    # newer versions of LightRAG needs to be initialized before using
    from lightrag.kg.shared_storage import initialize_pipeline_status
    from lightrag.operate import (
        _find_most_related_edges_from_entities,
        _find_most_related_text_unit_from_entities,
    )
    from lightrag.utils import EmbeddingFunc, compute_args_hash

except ImportError:
    print(
        (
            "LightRAG dependencies not installed. "
            "Try `pip install git+https://github.com/HKUDS/LightRAG.git` to install. "
            "LighthRAG retriever pipeline will not work properly."
        )
    )


logging.getLogger("lightrag").setLevel(logging.INFO)


filestorage_path = Path(settings.KH_FILESTORAGE_PATH) / "lightrag"
filestorage_path.mkdir(parents=True, exist_ok=True)

INDEX_BATCHSIZE = 4


def get_llm_func(model):
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10),
        retry=retry_if_exception_type((Exception,)),
        after=lambda retry_state: logging.warning(
            f"LLM API call attempt {retry_state.attempt_number} failed. Retrying..."
        ),
    )
    async def _call_model(model, input_messages):
        return (await model.ainvoke(input_messages)).text

    async def llm_func(
        prompt, system_prompt=None, history_messages=[], **kwargs
    ) -> str:
        input_messages = [SystemMessage(text=system_prompt)] if system_prompt else []

        hashing_kv = kwargs.pop("hashing_kv", None)
        if history_messages:
            for msg in history_messages:
                if msg.get("role") == "user":
                    input_messages.append(HumanMessage(text=msg["content"]))
                else:
                    input_messages.append(AIMessage(text=msg["content"]))

        input_messages.append(HumanMessage(text=prompt))

        if hashing_kv is not None:
            args_hash = compute_args_hash("model", input_messages)
            if_cache_return = await hashing_kv.get_by_id(args_hash)
            if if_cache_return is not None:
                return if_cache_return["return"]

        try:
            output = await _call_model(model, input_messages)
        except Exception as e:
            logging.error(f"Failed to call LLM API after 3 retries: {str(e)}")
            raise

        print("-" * 50)
        print(output, "\n", "-" * 50)

        if hashing_kv is not None:
            await hashing_kv.upsert({args_hash: {"return": output, "model": "model"}})

        return output

    return llm_func


def get_embedding_func(model):
    async def embedding_func(texts: list[str]) -> np.ndarray:
        outputs = model(texts)
        embedding_outputs = np.array([doc.embedding for doc in outputs])

        return embedding_outputs

    return embedding_func


def get_default_models_wrapper():
    # setup model functions
    default_embedding = embeddings.get_default()
    default_embedding_dim = len(default_embedding(["Hi"])[0].embedding)
    embedding_func = EmbeddingFunc(
        embedding_dim=default_embedding_dim,
        max_token_size=8192,
        func=get_embedding_func(default_embedding),
    )
    print("GraphRAG embedding dim", default_embedding_dim)

    default_llm = llms.get_default()
    llm_func = get_llm_func(default_llm)

    return llm_func, embedding_func, default_llm, default_embedding


def prepare_graph_index_path(graph_id: str):
    root_path = Path(filestorage_path) / graph_id
    input_path = root_path / "input"

    return root_path, input_path


def list_of_list_to_df(data: list[list]) -> pd.DataFrame:
    df = pd.DataFrame(data[1:], columns=data[0])
    return df


def clean_quote(input: str) -> str:
    return re.sub(r"[\"']", "", input)


async def lightrag_build_local_query_context(
    graph_func,
    query,
    query_param,
):
    knowledge_graph_inst = graph_func.chunk_entity_relation_graph
    entities_vdb = graph_func.entities_vdb
    text_chunks_db = graph_func.text_chunks

    results = await entities_vdb.query(query, top_k=query_param.top_k)
    if not len(results):
        raise ValueError("No results found")

    node_datas = await asyncio.gather(
        *[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
    )
    node_degrees = await asyncio.gather(
        *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
    )
    node_datas = [
        {**n, "entity_name": k["entity_name"], "rank": d}
        for k, n, d in zip(results, node_datas, node_degrees)
        if n is not None
    ]

    try:
        use_text_units = await _find_most_related_text_unit_from_entities(
            node_datas, query_param, text_chunks_db, knowledge_graph_inst
        )
    except Exception:
        use_text_units = []

    try:
        use_relations = await _find_most_related_edges_from_entities(
            node_datas, query_param, knowledge_graph_inst
        )
    except Exception:
        use_relations = []

    logging.info(
        f"Local query uses {len(node_datas)} entities, "
        f"{len(use_relations)} relations, {len(use_text_units)} text units"
    )

    entites_section_list = [["id", "entity", "type", "description", "rank"]]
    for i, n in enumerate(node_datas):
        entites_section_list.append(
            [
                str(i),
                clean_quote(n["entity_name"]),
                n.get("entity_type", "UNKNOWN"),
                clean_quote(n.get("description", "UNKNOWN")),
                n["rank"],
            ]
        )
    entities_df = list_of_list_to_df(entites_section_list)

    relations_section_list = [
        ["id", "source", "target", "description", "keywords", "weight", "rank"]
    ]
    for i, e in enumerate(use_relations):
        relations_section_list.append(
            [
                str(i),
                clean_quote(e["src_tgt"][0]),
                clean_quote(e["src_tgt"][1]),
                clean_quote(e["description"]),
                e["keywords"],
                e["weight"],
                e["rank"],
            ]
        )
    relations_df = list_of_list_to_df(relations_section_list)

    text_units_section_list = [["id", "content"]]
    for i, t in enumerate(use_text_units):
        text_units_section_list.append([str(i), t["content"]])
    sources_df = list_of_list_to_df(text_units_section_list)

    return entities_df, relations_df, sources_df


def build_graphrag(working_dir, llm_func, embedding_func):
    graphrag_func = LightRAG(
        working_dir=working_dir,
        llm_model_func=llm_func,
        embedding_func=embedding_func,
    )

    # newer versions of LightRAG needs to be initialized before using
    asyncio.run(graphrag_func.initialize_storages())
    asyncio.run(initialize_pipeline_status())

    return graphrag_func


class LightRAGIndexingPipeline(GraphRAGIndexingPipeline):
    """GraphRAG specific indexing pipeline"""

    prompts: dict[str, str] = {}
    collection_graph_id: str
    index_batch_size: int = INDEX_BATCHSIZE

    def store_file_id_with_graph_id(self, file_ids: list[str | None]):
        if not settings.USE_GLOBAL_GRAPHRAG:
            return super().store_file_id_with_graph_id(file_ids)

        # Use the collection-wide graph ID for LightRAG
        graph_id = self.collection_graph_id

        # Record all files under this graph_id
        with Session(engine) as session:
            for file_id in file_ids:
                if not file_id:
                    continue
                # Check if mapping already exists
                existing = (
                    session.query(self.Index)
                    .filter(
                        self.Index.source_id == file_id,
                        self.Index.target_id == graph_id,
                        self.Index.relation_type == "graph",
                    )
                    .first()
                )
                if not existing:
                    node = self.Index(
                        source_id=file_id,
                        target_id=graph_id,
                        relation_type="graph",
                    )
                    session.add(node)
            session.commit()

        return graph_id

    @classmethod
    def get_user_settings(cls) -> dict:
        try:
            from lightrag.prompt import PROMPTS

            blacklist_keywords = ["default", "response", "process"]
            settings_dict = {
                "batch_size": {
                    "name": (
                        "Index batch size " "(reduce if you have rate limit issues)"
                    ),
                    "value": INDEX_BATCHSIZE,
                    "component": "number",
                }
            }
            settings_dict.update(
                {
                    prompt_name: {
                        "name": f"Prompt for '{prompt_name}'",
                        "value": content,
                        "component": "text",
                    }
                    for prompt_name, content in PROMPTS.items()
                    if all(
                        keyword not in prompt_name.lower()
                        for keyword in blacklist_keywords
                    )
                    and isinstance(content, str)
                }
            )
            return settings_dict
        except ImportError as e:
            print(e)
            return {}

    def call_graphrag_index(self, graph_id: str, docs: list[Document]):
        from lightrag.prompt import PROMPTS

        # modify the prompt if it is set in the settings
        for prompt_name, content in self.prompts.items():
            if prompt_name in PROMPTS:
                PROMPTS[prompt_name] = content

        _, input_path = prepare_graph_index_path(graph_id)
        input_path.mkdir(parents=True, exist_ok=True)

        (
            llm_func,
            embedding_func,
            default_llm,
            default_embedding,
        ) = get_default_models_wrapper()
        print(
            f"Indexing GraphRAG with LLM {default_llm} "
            f"and Embedding {default_embedding}..."
        )

        all_docs = [
            doc.text
            for doc in docs
            if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0
        ]

        yield Document(
            channel="debug",
            text="[GraphRAG] Creating/Updating index... This can take a long time.",
        )

        # Check if graph already exists
        graph_file = input_path / "graph_chunk_entity_relation.graphml"
        is_incremental = graph_file.exists()

        # Only clear cache if it's a new graph
        if not is_incremental:
            json_files = glob.glob(f"{input_path}/*.json")
            for json_file in json_files:
                os.remove(json_file)

        # Initialize or load existing GraphRAG
        graphrag_func = build_graphrag(
            input_path,
            llm_func=llm_func,
            embedding_func=embedding_func,
        )

        total_docs = len(all_docs)
        process_doc_count = 0
        yield Document(
            channel="debug",
            text=(
                f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: "
                f"{process_doc_count} / {total_docs} documents."
            ),
        )

        for doc_id in range(0, len(all_docs), self.index_batch_size):
            cur_docs = all_docs[doc_id : doc_id + self.index_batch_size]
            combined_doc = "\n".join(cur_docs)

            # Use insert for incremental updates
            graphrag_func.insert(combined_doc)
            process_doc_count += len(cur_docs)
            yield Document(
                channel="debug",
                text=(
                    f"[GraphRAG] {'Updated' if is_incremental else 'Indexed'} "
                    f"{process_doc_count} / {total_docs} documents."
                ),
            )

        yield Document(
            channel="debug",
            text=f"[GraphRAG] {'Update' if is_incremental else 'Indexing'} finished.",
        )

    def stream(
        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs
    ) -> Generator[
        Document, None, tuple[list[str | None], list[str | None], list[Document]]
    ]:
        file_ids, errors, all_docs = yield from super().stream(
            file_paths, reindex=reindex, **kwargs
        )

        return file_ids, errors, all_docs


class LightRAGRetrieverPipeline(BaseFileIndexRetriever):
    """GraphRAG specific retriever pipeline"""

    Index = Param(help="The SQLAlchemy Index table")
    file_ids: list[str] = []
    search_type: str = "local"

    @classmethod
    def get_user_settings(cls) -> dict:
        return {
            "search_type": {
                "name": "Search type",
                "value": "local",
                "choices": ["local", "global", "hybrid"],
                "component": "dropdown",
                "info": "Whether to use local or global search in the graph.",
            }
        }

    def _build_graph_search(self):
        file_id = self.file_ids[0]

        # retrieve the graph_id from the index
        with Session(engine) as session:
            graph_id = (
                session.query(self.Index.target_id)
                .filter(self.Index.source_id == file_id)
                .filter(self.Index.relation_type == "graph")
                .first()
            )
            graph_id = graph_id[0] if graph_id else None
            assert graph_id, f"GraphRAG index not found for file_id: {file_id}"

        _, input_path = prepare_graph_index_path(graph_id)
        input_path.mkdir(parents=True, exist_ok=True)

        llm_func, embedding_func, _, _ = get_default_models_wrapper()
        graphrag_func = build_graphrag(
            input_path,
            llm_func=llm_func,
            embedding_func=embedding_func,
        )
        print("search_type", self.search_type)
        query_params = QueryParam(mode=self.search_type, only_need_context=True)

        return graphrag_func, query_params

    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:
        return RetrievedDocument(
            text=context_text,
            metadata={
                "file_name": header,
                "type": "table",
                "llm_trulens_score": 1.0,
            },
            score=1.0,
        )

    def format_context_records(
        self, entities, relationships, sources
    ) -> list[RetrievedDocument]:
        docs = []
        context: str = ""

        # entities current parsing error
        header = "<b>Entities</b>\n"
        context = entities[["entity", "description"]].to_markdown(index=False)
        docs.append(self._to_document(header, context))

        header = "\n<b>Relationships</b>\n"
        context = relationships[["source", "target", "description"]].to_markdown(
            index=False
        )
        docs.append(self._to_document(header, context))

        header = "\n<b>Sources</b>\n"
        context = ""
        for _, row in sources.iterrows():
            title, content = row["id"], row["content"]
            context += f"\n\n<h5>Source <b>#{title}</b></h5>\n"
            context += content
        docs.append(self._to_document(header, context))

        return docs

    def plot_graph(self, relationships):
        G = create_knowledge_graph(relationships)
        plot = visualize_graph(G)
        return plot

    def run(
        self,
        text: str,
    ) -> list[RetrievedDocument]:
        if not self.file_ids:
            return []

        graphrag_func, query_params = self._build_graph_search()

        # only local mode support graph visualization
        if query_params.mode == "local":
            entities, relationships, sources = asyncio.run(
                lightrag_build_local_query_context(graphrag_func, text, query_params)
            )
            documents = self.format_context_records(entities, relationships, sources)
            plot = self.plot_graph(relationships)
            documents += [
                RetrievedDocument(
                    text="",
                    metadata={
                        "file_name": "GraphRAG",
                        "type": "plot",
                        "data": plot,
                    },
                ),
            ]
        else:
            context = graphrag_func.query(text, query_params)

            # account for missing ``` for closing code block
            context += "\n```"

            documents = [
                RetrievedDocument(
                    text=context,
                    metadata={
                        "file_name": "GraphRAG {} Search".format(
                            query_params.mode.capitalize()
                        ),
                        "type": "table",
                    },
                )
            ]

        return documents


================================================
FILE: libs/ktem/ktem/index/file/graph/nano_graph_index.py
================================================
from typing import Any, Optional
from uuid import uuid4

from ktem.db.engine import engine
from sqlalchemy.orm import Session

from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever
from .graph_index import GraphRAGIndex
from .nano_pipelines import NanoGraphRAGIndexingPipeline, NanoGraphRAGRetrieverPipeline


class NanoGraphRAGIndex(GraphRAGIndex):
    def __init__(self, app, id: int, name: str, config: dict):
        super().__init__(app, id, name, config)
        self._collection_graph_id: Optional[str] = None

    def _setup_indexing_cls(self):
        self._indexing_pipeline_cls = NanoGraphRAGIndexingPipeline

    def _setup_retriever_cls(self):
        self._retriever_pipeline_cls = [NanoGraphRAGRetrieverPipeline]

    def _get_or_create_collection_graph_id(self):
        if self._collection_graph_id:
            return self._collection_graph_id

        # Try to find existing graph ID for this collection
        with Session(engine) as session:
            result = (
                session.query(self._resources["Index"].target_id)  # type: ignore
                .filter(
                    self._resources["Index"].relation_type == "graph"  # type: ignore
                )
                .first()
            )
            if result:
                self._collection_graph_id = result[0]
            else:
                self._collection_graph_id = str(uuid4())
        return self._collection_graph_id

    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
        pipeline = super().get_indexing_pipeline(settings, user_id)
        # indexing settings
        prefix = f"index.options.{self.id}."
        striped_settings = {
            key[len(prefix) :]: value
            for key, value in settings.items()
            if key.startswith(prefix)
        }
        # set the prompts
        pipeline.prompts = striped_settings
        # set collection graph id
        pipeline.collection_graph_id = self._get_or_create_collection_graph_id()
        # set index batch size
        pipeline.index_batch_size = striped_settings.get(
            "batch_size", pipeline.index_batch_size
        )
        return pipeline

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseFileIndexRetriever"]:
        file_ids = self._selector_ui.get_selected_ids(selected)
        # retrieval settings
        prefix = f"index.options.{self.id}."
        search_type = settings.get(prefix + "search_type", "local")

        retrievers = [
            NanoGraphRAGRetrieverPipeline(
                file_ids=file_ids,
                Index=self._resources["Index"],
                search_type=search_type,
            )
        ]

        return retrievers


================================================
FILE: libs/ktem/ktem/index/file/graph/nano_pipelines.py
================================================
import asyncio
import glob
import logging
import os
import re
from pathlib import Path
from typing import Generator

import numpy as np
import pandas as pd
from ktem.db.models import engine
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from sqlalchemy.orm import Session
from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_exponential,
)
from theflow.settings import settings

from kotaemon.base import Document, Param, RetrievedDocument
from kotaemon.base.schema import AIMessage, HumanMessage, SystemMessage

from ..pipelines import BaseFileIndexRetriever
from .pipelines import GraphRAGIndexingPipeline
from .visualize import create_knowledge_graph, visualize_graph

try:
    from nano_graphrag import GraphRAG, QueryParam
    from nano_graphrag._op import (
        _find_most_related_community_from_entities,
        _find_most_related_edges_from_entities,
        _find_most_related_text_unit_from_entities,
    )
    from nano_graphrag._utils import EmbeddingFunc, compute_args_hash

except ImportError:
    print(
        (
            "Nano-GraphRAG dependencies not installed. "
            "Try `pip install nano-graphrag` to install. "
            "Nano-GraphRAG retriever pipeline will not work properly."
        )
    )


logging.getLogger("nano-graphrag").setLevel(logging.INFO)


filestorage_path = Path(settings.KH_FILESTORAGE_PATH) / "nano_graphrag"
filestorage_path.mkdir(parents=True, exist_ok=True)

INDEX_BATCHSIZE = 4


def get_llm_func(model):
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10),
        retry=retry_if_exception_type((Exception,)),
        after=lambda retry_state: logging.warning(
            f"LLM API call attempt {retry_state.attempt_number} failed. Retrying..."
        ),
    )
    async def _call_model(model, input_messages):
        return (await model.ainvoke(input_messages)).text

    async def llm_func(
        prompt, system_prompt=None, history_messages=[], **kwargs
    ) -> str:
        input_messages = [SystemMessage(text=system_prompt)] if system_prompt else []

        hashing_kv = kwargs.pop("hashing_kv", None)
        if history_messages:
            for msg in history_messages:
                if msg.get("role") == "user":
                    input_messages.append(HumanMessage(text=msg["content"]))
                else:
                    input_messages.append(AIMessage(text=msg["content"]))

        input_messages.append(HumanMessage(text=prompt))

        if hashing_kv is not None:
            args_hash = compute_args_hash("model", input_messages)
            if_cache_return = await hashing_kv.get_by_id(args_hash)
            if if_cache_return is not None:
                return if_cache_return["return"]

        try:
            output = await _call_model(model, input_messages)
        except Exception as e:
            logging.error(f"Failed to call LLM API after 3 retries: {str(e)}")
            raise

        print("-" * 50)
        print(output, "\n", "-" * 50)

        if hashing_kv is not None:
            await hashing_kv.upsert({args_hash: {"return": output, "model": "model"}})

        return output

    return llm_func


def get_embedding_func(model):
    async def embedding_func(texts: list[str]) -> np.ndarray:
        outputs = model(texts)
        embedding_outputs = np.array([doc.embedding for doc in outputs])

        return embedding_outputs

    return embedding_func


def get_default_models_wrapper():
    # setup model functions
    default_embedding = embeddings.get_default()
    default_embedding_dim = len(default_embedding(["Hi"])[0].embedding)
    embedding_func = EmbeddingFunc(
        embedding_dim=default_embedding_dim,
        max_token_size=8192,
        func=get_embedding_func(default_embedding),
    )
    print("GraphRAG embedding dim", default_embedding_dim)

    default_llm = llms.get_default()
    llm_func = get_llm_func(default_llm)

    return llm_func, embedding_func, default_llm, default_embedding


def prepare_graph_index_path(graph_id: str):
    root_path = Path(filestorage_path) / graph_id
    input_path = root_path / "input"

    return root_path, input_path


def list_of_list_to_df(data: list[list]) -> pd.DataFrame:
    df = pd.DataFrame(data[1:], columns=data[0])
    return df


def clean_quote(input: str) -> str:
    return re.sub(r"[\"']", "", input)


async def nano_graph_rag_build_local_query_context(
    graph_func,
    query,
    query_param,
):
    knowledge_graph_inst = graph_func.chunk_entity_relation_graph
    entities_vdb = graph_func.entities_vdb
    community_reports = graph_func.community_reports
    text_chunks_db = graph_func.text_chunks

    results = await entities_vdb.query(query, top_k=query_param.top_k)
    if not len(results):
        raise ValueError("No results found")

    node_datas = await asyncio.gather(
        *[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
    )
    node_degrees = await asyncio.gather(
        *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
    )
    node_datas = [
        {**n, "entity_name": k["entity_name"], "rank": d}
        for k, n, d in zip(results, node_datas, node_degrees)
        if n is not None
    ]
    use_communities = await _find_most_related_community_from_entities(
        node_datas, query_param, community_reports
    )
    use_text_units = await _find_most_related_text_unit_from_entities(
        node_datas, query_param, text_chunks_db, knowledge_graph_inst
    )
    use_relations = await _find_most_related_edges_from_entities(
        node_datas, query_param, knowledge_graph_inst
    )
    entites_section_list = [["id", "entity", "type", "description", "rank"]]
    for i, n in enumerate(node_datas):
        entites_section_list.append(
            [
                str(i),
                clean_quote(n["entity_name"]),
                n.get("entity_type", "UNKNOWN"),
                clean_quote(n.get("description", "UNKNOWN")),
                n["rank"],
            ]
        )
    entities_df = list_of_list_to_df(entites_section_list)

    relations_section_list = [
        ["id", "source", "target", "description", "weight", "rank"]
    ]
    for i, e in enumerate(use_relations):
        relations_section_list.append(
            [
                str(i),
                clean_quote(e["src_tgt"][0]),
                clean_quote(e["src_tgt"][1]),
                clean_quote(e["description"]),
                e["weight"],
                e["rank"],
            ]
        )
    relations_df = list_of_list_to_df(relations_section_list)

    communities_section_list = [["id", "content"]]
    for i, c in enumerate(use_communities):
        communities_section_list.append([str(i), c["report_string"]])
    communities_df = list_of_list_to_df(communities_section_list)

    text_units_section_list = [["id", "content"]]
    for i, t in enumerate(use_text_units):
        text_units_section_list.append([str(i), t["content"]])
    sources_df = list_of_list_to_df(text_units_section_list)

    return entities_df, relations_df, communities_df, sources_df


def build_graphrag(working_dir, llm_func, embedding_func):
    graphrag_func = GraphRAG(
        working_dir=working_dir,
        best_model_func=llm_func,
        cheap_model_func=llm_func,
        embedding_func=embedding_func,
    )
    return graphrag_func


class NanoGraphRAGIndexingPipeline(GraphRAGIndexingPipeline):
    """GraphRAG specific indexing pipeline"""

    prompts: dict[str, str] = {}
    collection_graph_id: str
    index_batch_size: int = INDEX_BATCHSIZE

    def store_file_id_with_graph_id(self, file_ids: list[str | None]):
        if not settings.USE_GLOBAL_GRAPHRAG:
            return super().store_file_id_with_graph_id(file_ids)

        # Use the collection-wide graph ID for LightRAG
        graph_id = self.collection_graph_id

        # Record all files under this graph_id
        with Session(engine) as session:
            for file_id in file_ids:
                if not file_id:
                    continue
                # Check if mapping already exists
                existing = (
                    session.query(self.Index)
                    .filter(
                        self.Index.source_id == file_id,
                        self.Index.target_id == graph_id,
                        self.Index.relation_type == "graph",
                    )
                    .first()
                )
                if not existing:
                    node = self.Index(
                        source_id=file_id,
                        target_id=graph_id,
                        relation_type="graph",
                    )
                    session.add(node)
            session.commit()

        return graph_id

    @classmethod
    def get_user_settings(cls) -> dict:
        try:
            from nano_graphrag.prompt import PROMPTS

            blacklist_keywords = ["default", "response", "process"]
            settings_dict = {
                "batch_size": {
                    "name": (
                        "Index batch size " "(reduce if you have rate limit issues)"
                    ),
                    "value": INDEX_BATCHSIZE,
                    "component": "number",
                }
            }
            settings_dict.update(
                {
                    prompt_name: {
                        "name": f"Prompt for '{prompt_name}'",
                        "value": content,
                        "component": "text",
                    }
                    for prompt_name, content in PROMPTS.items()
                    if all(
                        keyword not in prompt_name.lower()
                        for keyword in blacklist_keywords
                    )
                    and isinstance(content, str)
                }
            )
            return settings_dict
        except ImportError as e:
            print(e)
            return {}

    def call_graphrag_index(self, graph_id: str, docs: list[Document]):
        from nano_graphrag.prompt import PROMPTS

        # modify the prompt if it is set in the settings
        for prompt_name, content in self.prompts.items():
            if prompt_name in PROMPTS:
                PROMPTS[prompt_name] = content

        _, input_path = prepare_graph_index_path(graph_id)
        input_path.mkdir(parents=True, exist_ok=True)

        (
            llm_func,
            embedding_func,
            default_llm,
            default_embedding,
        ) = get_default_models_wrapper()
        print(
            f"Indexing GraphRAG with LLM {default_llm} "
            f"and Embedding {default_embedding}..."
        )

        all_docs = [
            doc.text
            for doc in docs
            if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0
        ]

        yield Document(
            channel="debug",
            text="[GraphRAG] Creating/Updating index... This can take a long time.",
        )

        # Check if graph already exists
        graph_file = input_path / "graph_chunk_entity_relation.graphml"
        is_incremental = graph_file.exists()

        # Only clear cache if it's a new graph
        if not is_incremental:
            json_files = glob.glob(f"{input_path}/*.json")
            for json_file in json_files:
                os.remove(json_file)

        # Initialize or load existing GraphRAG
        graphrag_func = build_graphrag(
            input_path,
            llm_func=llm_func,
            embedding_func=embedding_func,
        )

        total_docs = len(all_docs)
        process_doc_count = 0
        yield Document(
            channel="debug",
            text=(
                f"[GraphRAG] {'Updating' if is_incremental else 'Creating'} index: "
                f"{process_doc_count} / {total_docs} documents."
            ),
        )

        for doc_id in range(0, len(all_docs), self.index_batch_size):
            cur_docs = all_docs[doc_id : doc_id + self.index_batch_size]
            combined_doc = "\n".join(cur_docs)

            # Use insert for incremental updates
            graphrag_func.insert(combined_doc)
            process_doc_count += len(cur_docs)
            yield Document(
                channel="debug",
                text=(
                    f"[GraphRAG] {'Updated' if is_incremental else 'Indexed'} "
                    f"{process_doc_count} / {total_docs} documents."
                ),
            )

        yield Document(
            channel="debug",
            text=f"[GraphRAG] {'Update' if is_incremental else 'Indexing'} finished.",
        )

    def stream(
        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs
    ) -> Generator[
        Document, None, tuple[list[str | None], list[str | None], list[Document]]
    ]:
        file_ids, errors, all_docs = yield from super().stream(
            file_paths, reindex=reindex, **kwargs
        )

        return file_ids, errors, all_docs


class NanoGraphRAGRetrieverPipeline(BaseFileIndexRetriever):
    """GraphRAG specific retriever pipeline"""

    Index = Param(help="The SQLAlchemy Index table")
    file_ids: list[str] = []
    search_type: str = "local"

    @classmethod
    def get_user_settings(cls) -> dict:
        return {
            "search_type": {
                "name": "Search type",
                "value": "local",
                "choices": ["local", "global"],
                "component": "dropdown",
                "info": "Whether to use local or global search in the graph.",
            }
        }

    def _build_graph_search(self):
        file_id = self.file_ids[0]

        # retrieve the graph_id from the index
        with Session(engine) as session:
            graph_id = (
                session.query(self.Index.target_id)
                .filter(self.Index.source_id == file_id)
                .filter(self.Index.relation_type == "graph")
                .first()
            )
            graph_id = graph_id[0] if graph_id else None
            assert graph_id, f"GraphRAG index not found for file_id: {file_id}"

        _, input_path = prepare_graph_index_path(graph_id)
        input_path.mkdir(parents=True, exist_ok=True)

        llm_func, embedding_func, _, _ = get_default_models_wrapper()
        graphrag_func = build_graphrag(
            input_path,
            llm_func=llm_func,
            embedding_func=embedding_func,
        )
        print("search_type", self.search_type)
        query_params = QueryParam(mode=self.search_type, only_need_context=True)

        return graphrag_func, query_params

    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:
        return RetrievedDocument(
            text=context_text,
            metadata={
                "file_name": header,
                "type": "table",
                "llm_trulens_score": 1.0,
            },
            score=1.0,
        )

    def format_context_records(
        self, entities, relationships, reports, sources
    ) -> list[RetrievedDocument]:
        docs = []
        context: str = ""

        # entities current parsing error
        header = "<b>Entities</b>\n"
        context = entities[["entity", "description"]].to_markdown(index=False)
        docs.append(self._to_document(header, context))

        header = "\n<b>Relationships</b>\n"
        context = relationships[["source", "target", "description"]].to_markdown(
            index=False
        )
        docs.append(self._to_document(header, context))

        header = "\n<b>Reports</b>\n"
        context = ""
        for _, row in reports.iterrows():
            title, content = row["id"], row["content"]  # not contain title
            context += f"\n\n<h5>Report <b>{title}</b></h5>\n"
            context += content
        docs.append(self._to_document(header, context))

        header = "\n<b>Sources</b>\n"
        context = ""
        for _, row in sources.iterrows():
            title, content = row["id"], row["content"]
            context += f"\n\n<h5>Source <b>#{title}</b></h5>\n"
            context += content
        docs.append(self._to_document(header, context))

        return docs

    def plot_graph(self, relationships):
        G = create_knowledge_graph(relationships)
        plot = visualize_graph(G)
        return plot

    def run(
        self,
        text: str,
    ) -> list[RetrievedDocument]:
        if not self.file_ids:
            return []

        graphrag_func, query_params = self._build_graph_search()

        # only local mode support graph visualization
        if query_params.mode == "local":
            entities, relationships, reports, sources = asyncio.run(
                nano_graph_rag_build_local_query_context(
                    graphrag_func, text, query_params
                )
            )

            documents = self.format_context_records(
                entities, relationships, reports, sources
            )
            plot = self.plot_graph(relationships)

            documents += [
                RetrievedDocument(
                    text="",
                    metadata={
                        "file_name": "GraphRAG",
                        "type": "plot",
                        "data": plot,
                    },
                ),
            ]
        else:
            context = graphrag_func.query(text, query_params)

            documents = [
                RetrievedDocument(
                    text=context,
                    metadata={
                        "file_name": "GraphRAG {} Search".format(
                            query_params.mode.capitalize()
                        ),
                        "type": "table",
                    },
                )
            ]

        return documents


================================================
FILE: libs/ktem/ktem/index/file/graph/pipelines.py
================================================
import os
import shutil
import subprocess
from pathlib import Path
from shutil import rmtree
from typing import Generator
from uuid import uuid4

import pandas as pd
import tiktoken
import yaml
from decouple import config
from ktem.db.models import engine
from sqlalchemy.orm import Session
from theflow.settings import settings

from kotaemon.base import Document, Param, RetrievedDocument

from ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline
from .visualize import create_knowledge_graph, visualize_graph

try:
    from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
    from graphrag.query.indexer_adapters import (
        read_indexer_entities,
        read_indexer_relationships,
        read_indexer_reports,
        read_indexer_text_units,
    )
    from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
    from graphrag.query.llm.oai.embedding import OpenAIEmbedding
    from graphrag.query.llm.oai.typing import OpenaiApiType
    from graphrag.query.structured_search.local_search.mixed_context import (
        LocalSearchMixedContext,
    )
    from graphrag.vector_stores.lancedb import LanceDBVectorStore
except ImportError:
    print(
        (
            "GraphRAG dependencies not installed. "
            "Try `pip install graphrag future` to install. "
            "GraphRAG retriever pipeline will not work properly."
        )
    )


filestorage_path = Path(settings.KH_FILESTORAGE_PATH) / "graphrag"
filestorage_path.mkdir(parents=True, exist_ok=True)

GRAPHRAG_KEY_MISSING_MESSAGE = (
    "GRAPHRAG_API_KEY is not set. Please set it to use the GraphRAG retriever pipeline."
)


def check_graphrag_api_key():
    return len(os.getenv("GRAPHRAG_API_KEY", "")) > 0


def prepare_graph_index_path(graph_id: str):
    root_path = Path(filestorage_path) / graph_id
    input_path = root_path / "input"

    return root_path, input_path


class GraphRAGIndexingPipeline(IndexDocumentPipeline):
    """GraphRAG specific indexing pipeline"""

    def route(self, file_path: str | Path) -> IndexPipeline:
        """Simply disable the splitter (chunking) for this pipeline"""
        pipeline = super().route(file_path)
        pipeline.splitter = None

        return pipeline

    def store_file_id_with_graph_id(self, file_ids: list[str | None]):
        # create new graph_id and assign them to doc_id in self.Index
        # record in the index
        graph_id = str(uuid4())
        with Session(engine) as session:
            nodes = []
            for file_id in file_ids:
                if not file_id:
                    continue
                nodes.append(
                    self.Index(
                        source_id=file_id,
                        target_id=graph_id,
                        relation_type="graph",
                    )
                )

            session.add_all(nodes)
            session.commit()

        return graph_id

    def write_docs_to_files(self, graph_id: str, docs: list[Document]):
        root_path, input_path = prepare_graph_index_path(graph_id)
        input_path.mkdir(parents=True, exist_ok=True)

        for doc in docs:
            if doc.metadata.get("type", "text") == "text":
                with open(input_path / f"{doc.doc_id}.txt", "w") as f:
                    f.write(doc.text)

        return root_path

    def call_graphrag_index(self, graph_id: str, all_docs: list[Document]):
        if not check_graphrag_api_key():
            raise ValueError(GRAPHRAG_KEY_MISSING_MESSAGE)

        # call GraphRAG index with docs and graph_id
        input_path = self.write_docs_to_files(graph_id, all_docs)
        input_path = str(input_path.absolute())

        # Construct the command
        command = [
            "python",
            "-m",
            "graphrag.index",
            "--root",
            input_path,
            "--reporter",
            "rich",
            "--init",
        ]

        # Run the command
        yield Document(
            channel="debug",
            text="[GraphRAG] Creating index... This can take a long time.",
        )
        result = subprocess.run(command, capture_output=True, text=True)
        print(result.stdout)
        command = command[:-1]

        # copy customized GraphRAG config file if it exists
        if config("USE_CUSTOMIZED_GRAPHRAG_SETTING", default="value").lower() == "true":
            setting_file_path = os.path.join(os.getcwd(), "settings.yaml.example")
            destination_file_path = os.path.join(input_path, "settings.yaml")
            try:
                shutil.copy(setting_file_path, destination_file_path)
            except shutil.Error:
                # Handle the error if the file copy fails
                print("failed to copy customized GraphRAG config file. ")

        # Run the command and stream stdout
        with subprocess.Popen(command, stdout=subprocess.PIPE, text=True) as process:
            if process.stdout:
                for line in process.stdout:
                    yield Document(channel="debug", text=line)

    def stream(
        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs
    ) -> Generator[
        Document, None, tuple[list[str | None], list[str | None], list[Document]]
    ]:
        file_ids, errors, all_docs = yield from super().stream(
            file_paths, reindex=reindex, **kwargs
        )

        # assign graph_id to file_ids
        graph_id = self.store_file_id_with_graph_id(file_ids)
        # call GraphRAG index with docs and graph_id
        yield from self.call_graphrag_index(graph_id, all_docs)

        return file_ids, errors, all_docs


class GraphRAGRetrieverPipeline(BaseFileIndexRetriever):
    """GraphRAG specific retriever pipeline"""

    Index = Param(help="The SQLAlchemy Index table")
    file_ids: list[str] = []

    @classmethod
    def get_user_settings(cls) -> dict:
        return {
            "search_type": {
                "name": "Search type",
                "value": "local",
                "choices": ["local"],
                "component": "dropdown",
                "info": "Whether to use local or global search in the graph.",
            }
        }

    def _build_graph_search(self):
        assert (
            len(self.file_ids) <= 1
        ), "GraphRAG retriever only supports one file_id at a time"

        file_id = self.file_ids[0]
        # retrieve the graph_id from the index
        with Session(engine) as session:
            graph_id = (
                session.query(self.Index.target_id)
                .filter(self.Index.source_id == file_id)
                .filter(self.Index.relation_type == "graph")
                .first()
            )
            graph_id = graph_id[0] if graph_id else None
            assert graph_id, f"GraphRAG index not found for file_id: {file_id}"

        root_path, _ = prepare_graph_index_path(graph_id)
        output_path = root_path / "output"

        INPUT_DIR = output_path
        LANCEDB_URI = str(INPUT_DIR / "lancedb")
        COMMUNITY_REPORT_TABLE = "create_final_community_reports"
        ENTITY_TABLE = "create_final_nodes"
        ENTITY_EMBEDDING_TABLE = "create_final_entities"
        RELATIONSHIP_TABLE = "create_final_relationships"
        TEXT_UNIT_TABLE = "create_final_text_units"
        COMMUNITY_LEVEL = 2

        # read nodes table to get community and degree data
        entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
        entity_embedding_df = pd.read_parquet(
            f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet"
        )
        entities = read_indexer_entities(
            entity_df, entity_embedding_df, COMMUNITY_LEVEL
        )

        # load description embeddings to an in-memory lancedb vectorstore
        # to connect to a remote db, specify url and port values.
        description_embedding_store = LanceDBVectorStore(
            collection_name="entity_description_embeddings",
        )
        description_embedding_store.connect(db_uri=LANCEDB_URI)
        if Path(LANCEDB_URI).is_dir():
            rmtree(LANCEDB_URI)
        _ = store_entity_semantic_embeddings(
            entities=entities, vectorstore=description_embedding_store
        )
        print(f"Entity count: {len(entity_df)}")

        # Read relationships
        relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
        relationships = read_indexer_relationships(relationship_df)

        # Read community reports
        report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
        reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

        # Read text units
        text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
        text_units = read_indexer_text_units(text_unit_df)

        # initialize default settings
        embedding_model = os.getenv(
            "GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small"
        )
        embedding_api_key = os.getenv("GRAPHRAG_API_KEY")
        embedding_api_base = None

        # use customized GraphRAG settings if the flag is set
        if config("USE_CUSTOMIZED_GRAPHRAG_SETTING", default="value").lower() == "true":
            settings_yaml_path = Path(root_path) / "settings.yaml"
            with open(settings_yaml_path, "r") as f:
                settings = yaml.safe_load(f)
            if settings["embeddings"]["llm"]["model"]:
                embedding_model = settings["embeddings"]["llm"]["model"]
            if settings["embeddings"]["llm"]["api_key"]:
                embedding_api_key = settings["embeddings"]["llm"]["api_key"]
            if settings["embeddings"]["llm"]["api_base"]:
                embedding_api_base = settings["embeddings"]["llm"]["api_base"]

        text_embedder = OpenAIEmbedding(
            api_key=embedding_api_key,
            api_base=embedding_api_base,
            api_type=OpenaiApiType.OpenAI,
            model=embedding_model,
            deployment_name=embedding_model,
            max_retries=20,
        )
        token_encoder = tiktoken.get_encoding("cl100k_base")

        context_builder = LocalSearchMixedContext(
            community_reports=reports,
            text_units=text_units,
            entities=entities,
            relationships=relationships,
            covariates=None,
            entity_text_embeddings=description_embedding_store,
            embedding_vectorstore_key=EntityVectorStoreKey.ID,
            # if the vectorstore uses entity title as ids,
            # set this to EntityVectorStoreKey.TITLE
            text_embedder=text_embedder,
            token_encoder=token_encoder,
        )
        return context_builder

    def _to_document(self, header: str, context_text: str) -> RetrievedDocument:
        return RetrievedDocument(
            text=context_text,
            metadata={
                "file_name": header,
                "type": "table",
                "llm_trulens_score": 1.0,
            },
            score=1.0,
        )

    def format_context_records(self, context_records) -> list[RetrievedDocument]:
        entities = context_records.get("entities", [])
        relationships = context_records.get("relationships", [])
        reports = context_records.get("reports", [])
        sources = context_records.get("sources", [])

        docs = []

        context: str = ""

        header = "<b>Entities</b>\n"
        context = entities[["entity", "description"]].to_markdown(index=False)
        docs.append(self._to_document(header, context))

        header = "\n<b>Relationships</b>\n"
        context = relationships[["source", "target", "description"]].to_markdown(
            index=False
        )
        docs.append(self._to_document(header, context))

        header = "\n<b>Reports</b>\n"
        context = ""
        for idx, row in reports.iterrows():
            title, content = row["title"], row["content"]
            context += f"\n\n<h5>Report <b>{title}</b></h5>\n"
            context += content
        docs.append(self._to_document(header, context))

        header = "\n<b>Sources</b>\n"
        context = ""
        for idx, row in sources.iterrows():
            title, content = row["id"], row["text"]
            context += f"\n\n<h5>Source <b>#{title}</b></h5>\n"
            context += content
        docs.append(self._to_document(header, context))

        return docs

    def plot_graph(self, context_records):
        relationships = context_records.get("relationships", [])
        G = create_knowledge_graph(relationships)
        plot = visualize_graph(G)
        return plot

    def generate_relevant_scores(self, text, documents: list[RetrievedDocument]):
        return documents

    def run(
        self,
        text: str,
    ) -> list[RetrievedDocument]:
        if not self.file_ids:
            return []

        if not check_graphrag_api_key():
            raise ValueError(GRAPHRAG_KEY_MISSING_MESSAGE)

        context_builder = self._build_graph_search()

        local_context_params = {
            "text_unit_prop": 0.5,
            "community_prop": 0.1,
            "conversation_history_max_turns": 5,
            "conversation_history_user_turns_only": True,
            "top_k_mapped_entities": 10,
            "top_k_relationships": 10,
            "include_entity_rank": False,
            "include_relationship_weight": False,
            "include_community_rank": False,
            "return_candidate_context": False,
            "embedding_vectorstore_key": EntityVectorStoreKey.ID,
            # set this to EntityVectorStoreKey.TITLE i
            # f the vectorstore uses entity title as ids
            "max_tokens": 12_000,
            # change this based on the token limit you have on your model
            # (if you are using a model with 8k limit, a good setting could be 5000)
        }

        context_text, context_records = context_builder.build_context(
            query=text,
            conversation_history=None,
            **local_context_params,
        )
        documents = self.format_context_records(context_records)
        plot = self.plot_graph(context_records)

        return documents + [
            RetrievedDocument(
                text="",
                metadata={
                    "file_name": "GraphRAG",
                    "type": "plot",
                    "data": plot,
                },
            ),
        ]


================================================
FILE: libs/ktem/ktem/index/file/graph/visualize.py
================================================
import networkx as nx
import plotly.graph_objects as go
from plotly.io import to_json


def create_knowledge_graph(df):
    """
    create nx Graph from DataFrame relations data
    """
    G = nx.Graph()
    for _, row in df.iterrows():
        source = row["source"]
        target = row["target"]
        attributes = {k: v for k, v in row.items() if k not in ["source", "target"]}
        G.add_edge(source, target, **attributes)

    return G


def visualize_graph(G):
    pos = nx.spring_layout(G, dim=2)

    edge_x = []
    edge_y = []
    edge_texts = nx.get_edge_attributes(G, "description")
    to_display_edge_texts = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
        to_display_edge_texts.append(edge_texts[edge])

    edge_trace = go.Scatter(
        x=edge_x,
        y=edge_y,
        text=to_display_edge_texts,
        line=dict(width=0.5, color="#888"),
        hoverinfo="text",
        mode="lines",
    )

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_adjacencies = []
    node_text = []
    node_size = []
    for node_id, adjacencies in enumerate(G.adjacency()):
        degree = len(adjacencies[1])
        node_adjacencies.append(degree)
        node_text.append(adjacencies[0])
        node_size.append(15 if degree < 5 else (30 if degree < 10 else 60))

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        textfont=dict(
            family="Courier New, monospace",
            size=10,  # Set the font size here
        ),
        textposition="top center",
        mode="markers+text",
        hoverinfo="text",
        text=node_text,
        marker=dict(
            showscale=True,
            # colorscale options
            size=node_size,
            colorscale="YlGnBu",
            reversescale=True,
            color=node_adjacencies,
            colorbar=dict(
                thickness=5,
                xanchor="left",
                titleside="right",
            ),
            line_width=2,
        ),
    )

    fig = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            showlegend=False,
            hovermode="closest",
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        ),
    )
    fig.update_layout(autosize=True)

    return to_json(fig)


================================================
FILE: libs/ktem/ktem/index/file/index.py
================================================
import uuid
from datetime import datetime
from typing import Any, Optional, Type

from ktem.components import filestorage_path, get_docstore, get_vectorstore
from ktem.db.engine import engine
from ktem.index.base import BaseIndex
from sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.mutable import MutableDict
from theflow.settings import settings as flowsettings
from theflow.utils.modules import import_dotted_string
from tzlocal import get_localzone

from kotaemon.storages import BaseDocumentStore, BaseVectorStore

from .base import BaseFileIndexIndexing, BaseFileIndexRetriever


def generate_uuid():
    return str(uuid.uuid4())


class FileIndex(BaseIndex):
    """
    File index to store and allow retrieval of files

    The file index stores files in a local folder and index them for retrieval.
    This file index provides the following infrastructure to support the indexing:
        - SQL table Source: store the list of files that are indexed by the system
        - Vector store: contain the embedding of segments of the files
        - Document store: contain the text of segments of the files. Each text stored
        in this document store is associated with a vector in the vector store.
        - SQL table Index: store the relationship between (1) the source and the
        docstore, and (2) the source and the vector store.
    """

    def __init__(self, app, id: int, name: str, config: dict):
        super().__init__(app, id, name, config)

        self._indexing_pipeline_cls: Type[BaseFileIndexIndexing]
        self._retriever_pipeline_cls: list[Type[BaseFileIndexRetriever]]
        self._selector_ui_cls: Type
        self._selector_ui: Any = None
        self._index_ui_cls: Type
        self._index_ui: Any = None

        self._default_settings: dict[str, dict] = {}
        self._setting_mappings: dict[str, dict] = {}

    def _setup_resources(self):
        """Setup resources for the file index

        The resources include:
            - Database table
            - Vector store
            - Document store
            - File storage path
        """
        Base = declarative_base()

        if self.config.get("private", False):
            Source = type(
                "Source",
                (Base,),
                {
                    "__tablename__": f"index__{self.id}__source",
                    "__table_args__": (
                        UniqueConstraint("name", "user", name="_name_user_uc"),
                    ),
                    "id": Column(
                        String,
                        primary_key=True,
                        default=lambda: str(uuid.uuid4()),
                        unique=True,
                    ),
                    "name": Column(String),
                    "path": Column(String),
                    "size": Column(Integer, default=0),
                    "date_created": Column(
                        DateTime(timezone=True), default=datetime.now(get_localzone())
                    ),
                    "user": Column(String, default=""),
                    "note": Column(
                        MutableDict.as_mutable(JSON),  # type: ignore
                        default={},
                    ),
                },
            )
        else:
            Source = type(
                "Source",
                (Base,),
                {
                    "__tablename__": f"index__{self.id}__source",
                    "id": Column(
                        String,
                        primary_key=True,
                        default=lambda: str(uuid.uuid4()),
                        unique=True,
                    ),
                    "name": Column(String, unique=True),
                    "path": Column(String),
                    "size": Column(Integer, default=0),
                    "date_created": Column(
                        DateTime(timezone=True), default=datetime.now(get_localzone())
                    ),
                    "user": Column(String, default=""),
                    "note": Column(
                        MutableDict.as_mutable(JSON),  # type: ignore
                        default={},
                    ),
                },
            )
        Index = type(
            "IndexTable",
            (Base,),
            {
                "__tablename__": f"index__{self.id}__index",
                "id": Column(Integer, primary_key=True, autoincrement=True),
                "source_id": Column(String),
                "target_id": Column(String),
                "relation_type": Column(String),
                "user": Column(String, default=""),
            },
        )
        FileGroup = type(
            "FileGroupTable",
            (Base,),
            {
                "__tablename__": f"index__{self.id}__group",
                "__table_args__": (
                    UniqueConstraint("name", "user", name="_name_user_uc"),
                ),
                "id": Column(
                    String,
                    primary_key=True,
                    default=lambda: str(uuid.uuid4()),
                    unique=True,
                ),
                "date_created": Column(
                    DateTime(timezone=True), default=datetime.now(get_localzone())
                ),
                "name": Column(String),
                "user": Column(String, default=""),
                "data": Column(
                    MutableDict.as_mutable(JSON),  # type: ignore
                    default={"files": []},
                ),
            },
        )

        self._vs: BaseVectorStore = get_vectorstore(f"index_{self.id}")
        self._docstore: BaseDocumentStore = get_docstore(f"index_{self.id}")
        self._fs_path = filestorage_path / f"index_{self.id}"
        self._resources = {
            "Source": Source,
            "Index": Index,
            "FileGroup": FileGroup,
            "VectorStore": self._vs,
            "DocStore": self._docstore,
            "FileStoragePath": self._fs_path,
        }

    def _setup_indexing_cls(self):
        """Retrieve the indexing class for the file index

        There is only one indexing class.

        The indexing class will is retrieved from the following order. Stop at the
        first order found:
            - `FILE_INDEX_PIPELINE` in self.config
            - `FILE_INDEX_{id}_PIPELINE` in the flowsettings
            - `FILE_INDEX_PIPELINE` in the flowsettings
            - The default .pipelines.IndexDocumentPipeline
        """
        if "FILE_INDEX_PIPELINE" in self.config:
            self._indexing_pipeline_cls = import_dotted_string(
                self.config["FILE_INDEX_PIPELINE"], safe=False
            )
            return

        if hasattr(flowsettings, f"FILE_INDEX_{self.id}_PIPELINE"):
            self._indexing_pipeline_cls = import_dotted_string(
                getattr(flowsettings, f"FILE_INDEX_{self.id}_PIPELINE"), safe=False
            )
            return

        if hasattr(flowsettings, "FILE_INDEX_PIPELINE"):
            self._indexing_pipeline_cls = import_dotted_string(
                getattr(flowsettings, "FILE_INDEX_PIPELINE"), safe=False
            )
            return

        from .pipelines import IndexDocumentPipeline

        self._indexing_pipeline_cls = IndexDocumentPipeline

    def _setup_retriever_cls(self):
        """Retrieve the retriever classes for the file index

        There can be multiple retriever classes.

        The retriever classes will is retrieved from the following order. Stop at the
        first order found:
            - `FILE_INDEX_RETRIEVER_PIPELINES` in self.config
            - `FILE_INDEX_{id}_RETRIEVER_PIPELINES` in the flowsettings
            - `FILE_INDEX_RETRIEVER_PIPELINES` in the flowsettings
            - The default .pipelines.DocumentRetrievalPipeline
        """
        if "FILE_INDEX_RETRIEVER_PIPELINES" in self.config:
            self._retriever_pipeline_cls = [
                import_dotted_string(each, safe=False)
                for each in self.config["FILE_INDEX_RETRIEVER_PIPELINES"]
            ]
            return

        if hasattr(flowsettings, f"FILE_INDEX_{self.id}_RETRIEVER_PIPELINES"):
            self._retriever_pipeline_cls = [
                import_dotted_string(each, safe=False)
                for each in getattr(
                    flowsettings, f"FILE_INDEX_{self.id}_RETRIEVER_PIPELINES"
                )
            ]
            return

        if hasattr(flowsettings, "FILE_INDEX_RETRIEVER_PIPELINES"):
            self._retriever_pipeline_cls = [
                import_dotted_string(each, safe=False)
                for each in getattr(flowsettings, "FILE_INDEX_RETRIEVER_PIPELINES")
            ]
            return

        from .pipelines import DocumentRetrievalPipeline

        self._retriever_pipeline_cls = [DocumentRetrievalPipeline]

    def _setup_file_selector_ui_cls(self):
        """Retrieve the file selector UI for the file index

        There can be multiple retriever classes.

        The retriever classes will is retrieved from the following order. Stop at the
        first order found:
            - `FILE_INDEX_SELECTOR_UI` in self.config
            - `FILE_INDEX_{id}_SELECTOR_UI` in the flowsettings
            - `FILE_INDEX_SELECTOR_UI` in the flowsettings
            - The default .ui.FileSelector
        """
        if "FILE_INDEX_SELECTOR_UI" in self.config:
            self._selector_ui_cls = import_dotted_string(
                self.config["FILE_INDEX_SELECTOR_UI"], safe=False
            )
            return

        if hasattr(flowsettings, f"FILE_INDEX_{self.id}_SELECTOR_UI"):
            self._selector_ui_cls = import_dotted_string(
                getattr(flowsettings, f"FILE_INDEX_{self.id}_SELECTOR_UI"),
                safe=False,
            )
            return

        if hasattr(flowsettings, "FILE_INDEX_SELECTOR_UI"):
            self._selector_ui_cls = import_dotted_string(
                getattr(flowsettings, "FILE_INDEX_SELECTOR_UI"), safe=False
            )
            return

        from .ui import FileSelector

        self._selector_ui_cls = FileSelector

    def _setup_file_index_ui_cls(self):
        """Retrieve the Index UI class

        There can be multiple retriever classes.

        The retriever classes will is retrieved from the following order. Stop at the
        first order found:
            - `FILE_INDEX_UI` in self.config
            - `FILE_INDEX_{id}_UI` in the flowsettings
            - `FILE_INDEX_UI` in the flowsettings
            - The default .ui.FileIndexPage
        """
        if "FILE_INDEX_UI" in self.config:
            self._index_ui_cls = import_dotted_string(
                self.config["FILE_INDEX_UI"], safe=False
            )
            return

        if hasattr(flowsettings, f"FILE_INDEX_{self.id}_UI"):
            self._index_ui_cls = import_dotted_string(
                getattr(flowsettings, f"FILE_INDEX_{self.id}_UI"),
                safe=False,
            )
            return

        if hasattr(flowsettings, "FILE_INDEX_UI"):
            self._index_ui_cls = import_dotted_string(
                getattr(flowsettings, "FILE_INDEX_UI"), safe=False
            )
            return

        from .ui import FileIndexPage

        self._index_ui_cls = FileIndexPage

    def on_create(self):
        """Create the index for the first time

        For the file index, this will:
            1. Postprocess the config
            2. Create the index and the source table if not already exists
            3. Create the vectorstore
            4. Create the docstore
        """
        # default user's value
        config = {}
        for key, value in self.get_admin_settings().items():
            config[key] = value["value"]

        # user's modification
        config.update(self.config)

        self.config = config

        # create the resources
        self._setup_resources()
        self._resources["Source"].metadata.create_all(engine)  # type: ignore
        self._resources["Index"].metadata.create_all(engine)  # type: ignore
        self._resources["FileGroup"].metadata.create_all(engine)  # type: ignore
        self._fs_path.mkdir(parents=True, exist_ok=True)

    def on_delete(self):
        """Clean up the index when the user delete it"""
        import shutil

        self._setup_resources()
        self._resources["Source"].__table__.drop(engine)  # type: ignore
        self._resources["Index"].__table__.drop(engine)  # type: ignore
        self._resources["FileGroup"].__table__.drop(engine)  # type: ignore
        self._vs.drop()
        self._docstore.drop()
        shutil.rmtree(self._fs_path)

    def on_start(self):
        """Setup the classes and hooks"""
        self._setup_resources()
        self._setup_indexing_cls()
        self._setup_retriever_cls()
        self._setup_file_index_ui_cls()
        self._setup_file_selector_ui_cls()

    def get_selector_component_ui(self):
        if self._selector_ui is None:
            self._selector_ui = self._selector_ui_cls(self._app, self)
        return self._selector_ui

    def get_index_page_ui(self):
        if self._index_ui is None:
            self._index_ui = self._index_ui_cls(self._app, self)
        return self._index_ui

    def get_user_settings(self):
        if self._default_settings:
            return self._default_settings

        settings = {}
        settings.update(self._indexing_pipeline_cls.get_user_settings())
        for cls in self._retriever_pipeline_cls:
            settings.update(cls.get_user_settings())

        self._default_settings = settings
        return settings

    @classmethod
    def get_admin_settings(cls):
        from ktem.embeddings.manager import embedding_models_manager

        embedding_default = "default"
        embedding_choices = list(embedding_models_manager.options().keys())

        return {
            "embedding": {
                "name": "Embedding model",
                "value": embedding_default,
                "component": "dropdown",
                "choices": embedding_choices,
                "info": "The name of embedding model to use.",
            },
            "supported_file_types": {
                "name": "Supported file types",
                "value": ".pdf, .txt",
                "component": "text",
                "info": "The file types that can be indexed, separated by comma.",
            },
            "max_file_size": {
                "name": "Max file size (MB)",
                "value": 1000,
                "component": "number",
                "info": "The maximum size of file. Set 0 to disable.",
            },
            "max_number_of_files": {
                "name": "Max number of files that can be indexed",
                "value": 0,
                "component": "number",
                "info": (
                    "The total number of files that can be indexed on the system. "
                    "Set 0 to disable."
                ),
            },
            "private": {
                "name": "Make private",
                "value": False,
                "component": "radio",
                "choices": [("Yes", True), ("No", False)],
                "info": "If private, files will not be accessible across users.",
            },
            "chunk_size": {
                "name": "Size of chunk (number of tokens)",
                "value": 0,
                "component": "number",
                "info": (
                    "Number of tokens of each text segment. "
                    "Set 0 to use developer setting."
                ),
            },
            "chunk_overlap": {
                "name": "Number of overlapping tokens between chunks",
                "value": 0,
                "component": "number",
                "info": (
                    "Number of tokens that consecutive text segments "
                    "should overlap with each other. "
                    "Set 0 to use developer setting."
                ),
            },
        }

    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
        """Define the interface of the indexing pipeline"""

        prefix = f"index.options.{self.id}."
        stripped_settings = {}
        for key, value in settings.items():
            if key.startswith(prefix):
                stripped_settings[key[len(prefix) :]] = value

        obj = self._indexing_pipeline_cls.get_pipeline(stripped_settings, self.config)
        obj.Source = self._resources["Source"]
        obj.Index = self._resources["Index"]
        obj.VS = self._vs
        obj.DS = self._docstore
        obj.FSPath = self._fs_path
        obj.user_id = user_id
        obj.private = self.config.get("private", False)
        obj.chunk_size = self.config.get("chunk_size", 0)
        obj.chunk_overlap = self.config.get("chunk_overlap", 0)

        return obj

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseFileIndexRetriever"]:
        # retrieval settings
        prefix = f"index.options.{self.id}."
        stripped_settings = {}
        for key, value in settings.items():
            if key.startswith(prefix):
                stripped_settings[key[len(prefix) :]] = value

        # transform selected id
        selected_ids: Optional[list[str]] = self._selector_ui.get_selected_ids(selected)

        retrievers = []
        for cls in self._retriever_pipeline_cls:
            obj = cls.get_pipeline(stripped_settings, self.config, selected_ids)
            if obj is None:
                continue
            obj.Source = self._resources["Source"]
            obj.Index = self._resources["Index"]
            obj.VS = self._vs
            obj.DS = self._docstore
            obj.FSPath = self._fs_path
            obj.user_id = user_id
            retrievers.append(obj)

        return retrievers


================================================
FILE: libs/ktem/ktem/index/file/knet/__init__.py
================================================
from .knet_index import KnowledgeNetworkFileIndex

__all__ = ["KnowledgeNetworkFileIndex"]


================================================
FILE: libs/ktem/ktem/index/file/knet/knet_index.py
================================================
from typing import Any

from ktem.index.file import FileIndex

from ..base import BaseFileIndexIndexing, BaseFileIndexRetriever
from .pipelines import KnetIndexingPipeline, KnetRetrievalPipeline


class KnowledgeNetworkFileIndex(FileIndex):
    @classmethod
    def get_admin_settings(cls):
        admin_settings = super().get_admin_settings()

        # remove embedding from admin settings
        # as we don't need it
        admin_settings.pop("embedding")
        return admin_settings

    def _setup_indexing_cls(self):
        self._indexing_pipeline_cls = KnetIndexingPipeline

    def _setup_retriever_cls(self):
        self._retriever_pipeline_cls = [KnetRetrievalPipeline]

    def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
        """Define the interface of the indexing pipeline"""

        obj = super().get_indexing_pipeline(settings, user_id)
        # disable vectorstore for this kind of Index
        # also set the collection_name for API call
        obj.VS = None
        obj.collection_name = f"kh_index_{self.id}"

        return obj

    def get_retriever_pipelines(
        self, settings: dict, user_id: int, selected: Any = None
    ) -> list["BaseFileIndexRetriever"]:
        retrievers = super().get_retriever_pipelines(settings, user_id, selected)

        for obj in retrievers:
            # disable vectorstore for this kind of Index
            # also set the collection_name for API call
            obj.VS = None
            obj.collection_name = f"kh_index_{self.id}"

        return retrievers


================================================
FILE: libs/ktem/ktem/index/file/knet/pipelines.py
================================================
import base64
import json
import os
from pathlib import Path
from typing import Optional, Sequence

import requests
import yaml

from kotaemon.base import RetrievedDocument
from kotaemon.indices.rankings import BaseReranking, LLMReranking, LLMTrulensScoring

from ..pipelines import BaseFileIndexRetriever, IndexDocumentPipeline, IndexPipeline


class KnetIndexingPipeline(IndexDocumentPipeline):
    """Knowledge Network specific indexing pipeline"""

    # collection name for external indexing call
    collection_name: str = "default"

    @classmethod
    def get_user_settings(cls):
        return {
            "reader_mode": {
                "name": "Index parser",
                "value": "knowledge_network",
                "choices": [
                    ("Default (KN)", "knowledge_network"),
                ],
                "component": "dropdown",
            },
        }

    def route(self, file_path: str | Path) -> IndexPipeline:
        """Simply disable the splitter (chunking) for this pipeline"""
        pipeline = super().route(file_path)
        pipeline.splitter = None
        # assign IndexPipeline collection name to parse to loader
        pipeline.collection_name = self.collection_name

        return pipeline


class KnetRetrievalPipeline(BaseFileIndexRetriever):
    DEFAULT_KNET_ENDPOINT: str = "http://127.0.0.1:8081/retrieve"

    collection_name: str = "default"
    rerankers: Sequence[BaseReranking] = [LLMReranking.withx()]

    def encode_image_base64(self, image_path: str | Path) -> bytes | str:
        """Convert image to base64"""
        img_base64 = "data:image/png;base64,{}"
        with open(image_path, "rb") as image_file:
            return img_base64.format(
                base64.b64encode(image_file.read()).decode("utf-8")
            )

    def run(
        self,
        text: str,
        doc_ids: Optional[list[str]] = None,
        *args,
        **kwargs,
    ) -> list[RetrievedDocument]:
        """Retrieve document excerpts similar to the text

        Args:
            text: the text to retrieve similar documents
            doc_ids: list of document ids to constraint the retrieval
        """
        print("searching in doc_ids", doc_ids)
        if not doc_ids:
            return []

        docs: list[RetrievedDocument] = []
        params = {
            "query": text,
            "collection": self.collection_name,
            "meta_filters": {"doc_name": doc_ids},
        }
        params["meta_filters"] = json.dumps(params["meta_filters"])
        response = requests.get(self.DEFAULT_KNET_ENDPOINT, params=params)
        metadata_translation = {
            "TABLE": "table",
            "FIGURE": "image",
        }

        if response.status_code == 200:
            # Load YAML content from the response content
            chunks = yaml.safe_load(response.content)
            for chunk in chunks:
                metadata = chunk["node"]["metadata"]
                metadata["type"] = metadata_translation.get(
                    metadata.pop("content_type", ""), ""
                )
                metadata["file_name"] = metadata.pop("company_name", "")

                # load image from returned path
                image_path = metadata.get("image_path", "")
                if image_path and os.path.isfile(image_path):
                    base64_im = self.encode_image_base64(image_path)
                    # explicitly set document type
                    metadata["type"] = "image"
                    metadata["image_origin"] = base64_im

                docs.append(
                    RetrievedDocument(text=chunk["node"]["text"], metadata=metadata)
                )
        else:
            raise IOError(f"{response.status_code}: {response.text}")

        for reranker in self.rerankers:
            docs = reranker(documents=docs, query=text)

        return docs

    @classmethod
    def get_user_settings(cls) -> dict:
        from ktem.llms.manager import llms

        try:
            reranking_llm = llms.get_default_name()
            reranking_llm_choices = list(llms.options().keys())
        except Exception:
            reranking_llm = None
            reranking_llm_choices = []

        return {
            "reranking_llm": {
                "name": "LLM for scoring",
                "value": reranking_llm,
                "component": "dropdown",
                "choices": reranking_llm_choices,
                "special_type": "llm",
            },
            "retrieval_mode": {
                "name": "Retrieval mode",
                "value": "hybrid",
                "choices": ["vector", "text", "hybrid"],
                "component": "dropdown",
            },
        }

    @classmethod
    def get_pipeline(cls, user_settings, index_settings, selected):
        """Get retriever objects associated with the index

        Args:
            settings: the settings of the app
            kwargs: other arguments
        """
        from ktem.llms.manager import llms

        retriever = cls(
            rerankers=[LLMTrulensScoring()],
        )

        # hacky way to input doc_ids to retriever.run() call (through theflow)
        kwargs = {".doc_ids": selected}
        retriever.set_run(kwargs, temp=False)

        for reranker in retriever.rerankers:
            if isinstance(reranker, LLMReranking):
                reranker.llm = llms.get(
                    user_settings["reranking_llm"], llms.get_default()
                )

        return retriever


================================================
FILE: libs/ktem/ktem/index/file/pipelines.py
================================================
from __future__ import annotations

import json
import logging
import shutil
import threading
import time
import warnings
from collections import defaultdict
from copy import deepcopy
from functools import lru_cache
from hashlib import sha256
from pathlib import Path
from typing import Generator, Optional, Sequence

import tiktoken
from decouple import config
from ktem.db.models import engine
from ktem.embeddings.manager import embedding_models_manager
from ktem.llms.manager import llms
from ktem.rerankings.manager import reranking_models_manager
from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.file.base import default_file_metadata_func
from llama_index.core.vector_stores import (
    FilterCondition,
    FilterOperator,
    MetadataFilter,
    MetadataFilters,
)
from llama_index.core.vector_stores.types import VectorStoreQueryMode
from sqlalchemy import delete, select
from sqlalchemy.orm import Session
from theflow.settings import settings
from theflow.utils.modules import import_dotted_string

from kotaemon.base import BaseComponent, Document, Node, Param, RetrievedDocument
from kotaemon.embeddings import BaseEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.indices.ingests.files import (
    KH_DEFAULT_FILE_EXTRACTORS,
    adobe_reader,
    azure_reader,
    docling_reader,
    unstructured,
    web_reader,
)
from kotaemon.indices.rankings import BaseReranking, LLMReranking, LLMTrulensScoring
from kotaemon.indices.splitters import BaseSplitter, TokenSplitter

from .base import BaseFileIndexIndexing, BaseFileIndexRetriever

logger = logging.getLogger(__name__)


@lru_cache
def dev_settings():
    """Retrieve the developer settings from flowsettings.py"""
    file_extractors = {}

    if hasattr(settings, "FILE_INDEX_PIPELINE_FILE_EXTRACTORS"):
        file_extractors = {
            key: import_dotted_string(value, safe=False)()
            for key, value in settings.FILE_INDEX_PIPELINE_FILE_EXTRACTORS.items()
        }

    chunk_size = None
    if hasattr(settings, "FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE"):
        chunk_size = settings.FILE_INDEX_PIPELINE_SPLITTER_CHUNK_SIZE

    chunk_overlap = None
    if hasattr(settings, "FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP"):
        chunk_overlap = settings.FILE_INDEX_PIPELINE_SPLITTER_CHUNK_OVERLAP

    return file_extractors, chunk_size, chunk_overlap


_default_token_func = tiktoken.encoding_for_model("gpt-3.5-turbo").encode


class DocumentRetrievalPipeline(BaseFileIndexRetriever):
    """Retrieve relevant document

    Args:
        vector_retrieval: the retrieval pipeline that return the relevant documents
            given a text query
        reranker: the reranking pipeline that re-rank and filter the retrieved
            documents
        get_extra_table: if True, for each retrieved document, the pipeline will look
            for surrounding tables (e.g. within the page)
        top_k: number of documents to retrieve
        mmr: whether to use mmr to re-rank the documents
    """

    embedding: BaseEmbeddings
    rerankers: Sequence[BaseReranking] = []
    # use LLM to create relevant scores for displaying on UI
    llm_scorer: LLMReranking | None = LLMReranking.withx()
    get_extra_table: bool = False
    mmr: bool = False
    top_k: int = 5
    retrieval_mode: str = "hybrid"

    @Node.auto(depends_on=["embedding", "VS", "DS"])
    def vector_retrieval(self) -> VectorRetrieval:
        return VectorRetrieval(
            embedding=self.embedding,
            vector_store=self.VS,
            doc_store=self.DS,
            retrieval_mode=self.retrieval_mode,  # type: ignore
            rerankers=self.rerankers,
        )

    def run(
        self,
        text: str,
        doc_ids: Optional[list[str]] = None,
        *args,
        **kwargs,
    ) -> list[RetrievedDocument]:
        """Retrieve document excerpts similar to the text

        Args:
            text: the text to retrieve similar documents
            doc_ids: list of document ids to constraint the retrieval
        """
        # flatten doc_ids in case of group of doc_ids are passed
        if doc_ids:
            flatten_doc_ids = []
            for doc_id in doc_ids:
                if doc_id is None:
                    raise ValueError("No document is selected")

                if doc_id.startswith("["):
                    flatten_doc_ids.extend(json.loads(doc_id))
                else:
                    flatten_doc_ids.append(doc_id)
            doc_ids = flatten_doc_ids

        print("searching in doc_ids", doc_ids)
        if not doc_ids:
            logger.info(f"Skip retrieval because of no selected files: {self}")
            return []

        retrieval_kwargs: dict = {}
        with Session(engine) as session:
            stmt = select(self.Index).where(
                self.Index.relation_type == "document",
                self.Index.source_id.in_(doc_ids),
            )
            results = session.execute(stmt)
            chunk_ids = [r[0].target_id for r in results.all()]

        # do first round top_k extension
        retrieval_kwargs["do_extend"] = True
        retrieval_kwargs["scope"] = chunk_ids
        retrieval_kwargs["filters"] = MetadataFilters(
            filters=[
                MetadataFilter(
                    key="file_id",
                    value=doc_ids,
                    operator=FilterOperator.IN,
                )
            ],
            condition=FilterCondition.OR,
        )

        if self.mmr:
            # TODO: double check that llama-index MMR works correctly
            retrieval_kwargs["mode"] = VectorStoreQueryMode.MMR
            retrieval_kwargs["mmr_threshold"] = 0.5

        # rerank
        s_time = time.time()
        print(f"retrieval_kwargs: {retrieval_kwargs.keys()}")
        docs = self.vector_retrieval(text=text, top_k=self.top_k, **retrieval_kwargs)
        print("retrieval step took", time.time() - s_time)

        if not self.get_extra_table:
            return docs

        # retrieve extra nodes relate to table
        table_pages = defaultdict(list)
        retrieved_id = set([doc.doc_id for doc in docs])
        for doc in docs:
            if "page_label" not in doc.metadata:
                continue
            if "file_name" not in doc.metadata:
                warnings.warn(
                    "file_name not in metadata while page_label is in metadata: "
                    f"{doc.metadata}"
                )
            table_pages[doc.metadata["file_name"]].append(doc.metadata["page_label"])

        queries: list[dict] = [
            {"$and": [{"file_name": {"$eq": fn}}, {"page_label": {"$in": pls}}]}
            for fn, pls in table_pages.items()
        ]
        if queries:
            try:
                extra_docs = self.vector_retrieval(
                    text="",
                    top_k=50,
                    where=queries[0] if len(queries) == 1 else {"$or": queries},
                )
                for doc in extra_docs:
                    if doc.doc_id not in retrieved_id:
                        docs.append(doc)
            except Exception:
                print("Error retrieving additional tables")

        return docs

    def generate_relevant_scores(
        self, query: str, documents: list[RetrievedDocument]
    ) -> list[RetrievedDocument]:
        docs = (
            documents
            if not self.llm_scorer
            else self.llm_scorer(documents=documents, query=query)
        )
        return docs

    @classmethod
    def get_user_settings(cls) -> dict:
        from ktem.llms.manager import llms

        try:
            reranking_llm = llms.get_default_name()
            reranking_llm_choices = list(llms.options().keys())
        except Exception as e:
            logger.error(e)
            reranking_llm = None
            reranking_llm_choices = []

        return {
            "reranking_llm": {
                "name": "LLM for relevant scoring",
                "value": reranking_llm,
                "component": "dropdown",
                "choices": reranking_llm_choices,
                "special_type": "llm",
            },
            "num_retrieval": {
                "name": "Number of document chunks to retrieve",
                "value": 10,
                "component": "number",
            },
            "retrieval_mode": {
                "name": "Retrieval mode",
                "value": "hybrid",
                "choices": ["vector", "text", "hybrid"],
                "component": "dropdown",
            },
            "prioritize_table": {
                "name": "Prioritize table",
                "value": False,
                "choices": [True, False],
                "component": "checkbox",
            },
            "mmr": {
                "name": "Use MMR",
                "value": False,
                "choices": [True, False],
                "component": "checkbox",
            },
            "use_reranking": {
                "name": "Use reranking",
                "value": True,
                "choices": [True, False],
                "component": "checkbox",
            },
            "use_llm_reranking": {
                "name": "Use LLM relevant scoring",
                "value": not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool),
                "choices": [True, False],
                "component": "checkbox",
            },
        }

    @classmethod
    def get_pipeline(cls, user_settings, index_settings, selected):
        """Get retriever objects associated with the index

        Args:
            settings: the settings of the app
            kwargs: other arguments
        """
        use_llm_reranking = user_settings.get("use_llm_reranking", False)

        retriever = cls(
            get_extra_table=user_settings["prioritize_table"],
            top_k=user_settings["num_retrieval"],
            mmr=user_settings["mmr"],
            embedding=embedding_models_manager[
                index_settings.get(
                    "embedding", embedding_models_manager.get_default_name()
                )
            ],
            retrieval_mode=user_settings["retrieval_mode"],
            llm_scorer=(LLMTrulensScoring() if use_llm_reranking else None),
            rerankers=[
                reranking_models_manager[
                    index_settings.get(
                        "reranking", reranking_models_manager.get_default_name()
                    )
                ]
            ],
        )
        if not user_settings["use_reranking"]:
            retriever.rerankers = []  # type: ignore

        for reranker in retriever.rerankers:
            if isinstance(reranker, LLMReranking):
                reranker.llm = llms.get(
                    user_settings["reranking_llm"], llms.get_default()
                )

        if retriever.llm_scorer:
            retriever.llm_scorer.llm = llms.get(
                user_settings["reranking_llm"], llms.get_default()
            )

        kwargs = {".doc_ids": selected}
        retriever.set_run(kwargs, temp=False)
        return retriever


class IndexPipeline(BaseComponent):
    """Index a single file"""

    loader: BaseReader
    splitter: BaseSplitter | None
    chunk_batch_size: int = 200

    Source = Param(help="The SQLAlchemy Source table")
    Index = Param(help="The SQLAlchemy Index table")
    VS = Param(help="The VectorStore")
    DS = Param(help="The DocStore")
    FSPath = Param(help="The file storage path")
    user_id = Param(help="The user id")
    collection_name: str = "default"
    private: bool = False
    run_embedding_in_thread: bool = False
    embedding: BaseEmbeddings

    @Node.auto(depends_on=["Source", "Index", "embedding"])
    def vector_indexing(self) -> VectorIndexing:
        return VectorIndexing(
            vector_store=self.VS, doc_store=self.DS, embedding=self.embedding
        )

    def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int]:
        s_time = time.time()
        text_docs = []
        non_text_docs = []
        thumbnail_docs = []

        for doc in docs:
            doc_type = doc.metadata.get("type", "text")
            if doc_type == "text":
                text_docs.append(doc)
            elif doc_type == "thumbnail":
                thumbnail_docs.append(doc)
            else:
                non_text_docs.append(doc)

        print(f"Got {len(thumbnail_docs)} page thumbnails")
        page_label_to_thumbnail = {
            doc.metadata["page_label"]: doc.doc_id for doc in thumbnail_docs
        }

        if self.splitter:
            all_chunks = self.splitter(text_docs)
        else:
            all_chunks = text_docs

        # add the thumbnails doc_id to the chunks
        for chunk in all_chunks:
            page_label = chunk.metadata.get("page_label", None)
            if page_label and page_label in page_label_to_thumbnail:
                chunk.metadata["thumbnail_doc_id"] = page_label_to_thumbnail[page_label]

        to_index_chunks = all_chunks + non_text_docs + thumbnail_docs

        # add to doc store
        chunks = []
        n_chunks = 0
        chunk_size = self.chunk_batch_size * 4
        for start_idx in range(0, len(to_index_chunks), chunk_size):
            chunks = to_index_chunks[start_idx : start_idx + chunk_size]
            self.handle_chunks_docstore(chunks, file_id)
            n_chunks += len(chunks)
            yield Document(
                f" => [{file_name}] Processed {n_chunks} chunks",
                channel="debug",
            )

        def insert_chunks_to_vectorstore():
            chunks = []
            n_chunks = 0
            chunk_size = self.chunk_batch_size
            for start_idx in range(0, len(to_index_chunks), chunk_size):
                chunks = to_index_chunks[start_idx : start_idx + chunk_size]
                self.handle_chunks_vectorstore(chunks, file_id)
                n_chunks += len(chunks)
                if self.VS:
                    yield Document(
                        f" => [{file_name}] Created embedding for {n_chunks} chunks",
                        channel="debug",
                    )

        # run vector indexing in thread if specified
        if self.run_embedding_in_thread:
            print("Running embedding in thread")
            threading.Thread(
                target=lambda: list(insert_chunks_to_vectorstore())
            ).start()
        else:
            yield from insert_chunks_to_vectorstore()

        print("indexing step took", time.time() - s_time)
        return n_chunks

    def handle_chunks_docstore(self, chunks, file_id):
        """Run chunks"""
        # run embedding, add to both vector store and doc store
        self.vector_indexing.add_to_docstore(chunks)

        # record in the index
        with Session(engine) as session:
            nodes = []
            for chunk in chunks:
                nodes.append(
                    self.Index(
                        source_id=file_id,
                        target_id=chunk.doc_id,
                        relation_type="document",
                    )
                )
            session.add_all(nodes)
            session.commit()

    def handle_chunks_vectorstore(self, chunks, file_id):
        """Run chunks"""
        # run embedding, add to both vector store and doc store
        self.vector_indexing.add_to_vectorstore(chunks)
        self.vector_indexing.write_chunk_to_file(chunks)

        if self.VS:
            # record in the index
            with Session(engine) as session:
                nodes = []
                for chunk in chunks:
                    nodes.append(
                        self.Index(
                            source_id=file_id,
                            target_id=chunk.doc_id,
                            relation_type="vector",
                        )
                    )
                session.add_all(nodes)
                session.commit()

    def get_id_if_exists(self, file_path: str | Path) -> Optional[str]:
        """Check if the file is already indexed

        Args:
            file_path: the path to the file

        Returns:
            the file id if the file is indexed, otherwise None
        """
        file_name = file_path.name if isinstance(file_path, Path) else file_path
        if self.private:
            cond: tuple = (
                self.Source.name == file_name,
                self.Source.user == self.user_id,
            )
        else:
            cond = (self.Source.name == file_name,)

        with Session(engine) as session:
            stmt = select(self.Source).where(*cond)
            item = session.execute(stmt).first()
            if item:
                return item[0].id

        return None

    def store_url(self, url: str) -> str:
        """Store URL into the database and storage, return the file id

        Args:
            url: the URL

        Returns:
            the file id
        """
        file_hash = sha256(url.encode()).hexdigest()
        source = self.Source(
            name=url,
            path=file_hash,
            size=0,
            user=self.user_id,  # type: ignore
        )
        with Session(engine) as session:
            session.add(source)
            session.commit()
            file_id = source.id

        return file_id

    def store_file(self, file_path: Path) -> str:
        """Store file into the database and storage, return the file id

        Args:
            file_path: the path to the file

        Returns:
            the file id
        """
        with file_path.open("rb") as fi:
            file_hash = sha256(fi.read()).hexdigest()

        shutil.copy(file_path, self.FSPath / file_hash)
        source = self.Source(
            name=file_path.name,
            path=file_hash,
            size=file_path.stat().st_size,
            user=self.user_id,  # type: ignore
        )
        with Session(engine) as session:
            session.add(source)
            session.commit()
            file_id = source.id

        return file_id

    def finish(self, file_id: str, file_path: str | Path) -> str:
        """Finish the indexing"""
        with Session(engine) as session:
            stmt = select(self.Source).where(self.Source.id == file_id)
            result = session.execute(stmt).first()
            if not result:
                return file_id

            item = result[0]

            # populate the number of tokens
            doc_ids_stmt = select(self.Index.target_id).where(
                self.Index.source_id == file_id,
                self.Index.relation_type == "document",
            )
            doc_ids = [_[0] for _ in session.execute(doc_ids_stmt)]
            token_func = self.get_token_func()
            if doc_ids and token_func:
                docs = self.DS.get(doc_ids)
                item.note["tokens"] = sum([len(token_func(doc.text)) for doc in docs])

            # populate the note
            item.note["loader"] = self.get_from_path("loader").__class__.__name__

            session.add(item)
            session.commit()

        return file_id

    def get_token_func(self):
        """Get the token function for calculating the number of tokens"""
        return _default_token_func

    def delete_file(self, file_id: str):
        """Delete a file from the db, including its chunks in docstore and vectorstore

        Args:
            file_id: the file id
        """
        with Session(engine) as session:
            session.execute(delete(self.Source).where(self.Source.id == file_id))
            vs_ids, ds_ids = [], []
            index = session.execute(
                select(self.Index).where(self.Index.source_id == file_id)
            ).all()
            for each in index:
                if each[0].relation_type == "vector":
                    vs_ids.append(each[0].target_id)
                elif each[0].relation_type == "document":
                    ds_ids.append(each[0].target_id)
                session.delete(each[0])
            session.commit()

        if vs_ids and self.VS:
            self.VS.delete(vs_ids)
        if ds_ids:
            self.DS.delete(ds_ids)

    def run(
        self, file_path: str | Path, reindex: bool, **kwargs
    ) -> tuple[str, list[Document]]:
        raise NotImplementedError

    def stream(
        self, file_path: str | Path, reindex: bool, **kwargs
    ) -> Generator[Document, None, tuple[str, list[Document]]]:
        # check if the file is already indexed
        if isinstance(file_path, Path):
            file_path = file_path.resolve()

        file_id = self.get_id_if_exists(file_path)

        if isinstance(file_path, Path):
            if file_id is not None:
                if not reindex:
                    raise ValueError(
                        f"File {file_path.name} already indexed. Please rerun with "
                        "reindex=True to force reindexing."
                    )
                else:
                    # remove the existing records
                    yield Document(
                        f" => Removing old {file_path.name}", channel="debug"
                    )
                    self.delete_file(file_id)
                    file_id = self.store_file(file_path)
            else:
                # add record to db
                file_id = self.store_file(file_path)
        else:
            if file_id is not None:
                raise ValueError(f"URL {file_path} already indexed.")
            else:
                # add record to db
                file_id = self.store_url(file_path)

        # extract the file
        if isinstance(file_path, Path):
            extra_info = default_file_metadata_func(str(file_path))
            file_name = file_path.name
        else:
            extra_info = {"file_name": file_path}
            file_name = file_path

        extra_info["file_id"] = file_id
        extra_info["collection_name"] = self.collection_name

        yield Document(f" => Converting {file_name} to text", channel="debug")
        docs = self.loader.load_data(file_path, extra_info=extra_info)
        yield Document(f" => Converted {file_name} to text", channel="debug")
        yield from self.handle_docs(docs, file_id, file_name)

        self.finish(file_id, file_path)

        yield Document(f" => Finished indexing {file_name}", channel="debug")
        return file_id, docs


class IndexDocumentPipeline(BaseFileIndexIndexing):
    """Index the file. Decide which pipeline based on the file type.

    This method is essentially a factory to decide which indexing pipeline to use.

    We can decide the pipeline programmatically, and/or automatically based on an LLM.
    If we based on the LLM, essentially we will log the LLM thought process in a file,
    and then during the indexing, we will read that file to decide which pipeline
    to use, and then log the operation in that file. Overtime, the LLM can learn to
    decide which pipeline should be used.
    """

    reader_mode: str = Param("default", help="The reader mode")
    embedding: BaseEmbeddings
    run_embedding_in_thread: bool = False

    @Param.auto(depends_on="reader_mode")
    def readers(self):
        readers = deepcopy(KH_DEFAULT_FILE_EXTRACTORS)
        print("reader_mode", self.reader_mode)
        if self.reader_mode == "adobe":
            readers[".pdf"] = adobe_reader
        elif self.reader_mode == "azure-di":
            readers[".pdf"] = azure_reader
        elif self.reader_mode == "docling":
            readers[".pdf"] = docling_reader

        dev_readers, _, _ = dev_settings()
        readers.update(dev_readers)

        return readers

    @classmethod
    def get_user_settings(cls):
        return {
            "reader_mode": {
                "name": "File loader",
                "value": "default",
                "choices": [
                    ("Default (open-source)", "default"),
                    ("Adobe API (figure+table extraction)", "adobe"),
                    (
                        "Azure AI Document Intelligence (figure+table extraction)",
                        "azure-di",
                    ),
                    ("Docling (figure+table extraction)", "docling"),
                ],
                "component": "dropdown",
            },
        }

    @classmethod
    def get_pipeline(cls, user_settings, index_settings) -> BaseFileIndexIndexing:
        use_quick_index_mode = user_settings.get("quick_index_mode", False)
        print("use_quick_index_mode", use_quick_index_mode)
        obj = cls(
            embedding=embedding_models_manager[
                index_settings.get(
                    "embedding", embedding_models_manager.get_default_name()
                )
            ],
            run_embedding_in_thread=use_quick_index_mode,
            reader_mode=user_settings.get("reader_mode", "default"),
        )
        return obj

    def is_url(self, file_path: str | Path) -> bool:
        return isinstance(file_path, str) and (
            file_path.startswith("http://") or file_path.startswith("https://")
        )

    def route(self, file_path: str | Path) -> IndexPipeline:
        """Decide the pipeline based on the file type

        Can subclass this method for a more elaborate pipeline routing strategy.
        """

        _, dev_chunk_size, dev_chunk_overlap = dev_settings()

        chunk_size = self.chunk_size or dev_chunk_size
        chunk_overlap = self.chunk_overlap or dev_chunk_overlap

        # check if file_path is a URL
        if self.is_url(file_path):
            reader = web_reader
        else:
            assert isinstance(file_path, Path)
            ext = file_path.suffix.lower()
            reader = self.readers.get(ext, unstructured)
            if reader is None:
                raise NotImplementedError(
                    f"No supported pipeline to index {file_path.name}. Please specify "
                    "the suitable pipeline for this file type in the settings."
                )

        print(f"Chunk size: {chunk_size}, chunk overlap: {chunk_overlap}")

        print("Using reader", reader)
        pipeline: IndexPipeline = IndexPipeline(
            loader=reader,
            splitter=TokenSplitter(
                chunk_size=chunk_size or 1024,
                chunk_overlap=chunk_overlap or 256,
                separator="\n\n",
                backup_separators=["\n", ".", "\u200B"],
            ),
            run_embedding_in_thread=self.run_embedding_in_thread,
            Source=self.Source,
            Index=self.Index,
            VS=self.VS,
            DS=self.DS,
            FSPath=self.FSPath,
            user_id=self.user_id,
            private=self.private,
            embedding=self.embedding,
        )

        return pipeline

    def run(
        self, file_paths: str | Path | list[str | Path], *args, **kwargs
    ) -> tuple[list[str | None], list[str | None]]:
        raise NotImplementedError

    def stream(
        self, file_paths: str | Path | list[str | Path], reindex: bool = False, **kwargs
    ) -> Generator[
        Document, None, tuple[list[str | None], list[str | None], list[Document]]
    ]:
        """Return a list of indexed file ids, and a list of errors"""
        if not isinstance(file_paths, list):
            file_paths = [file_paths]

        file_ids: list[str | None] = []
        errors: list[str | None] = []
        all_docs = []

        n_files = len(file_paths)
        for idx, file_path in enumerate(file_paths):
            if self.is_url(file_path):
                file_name = file_path
            else:
                file_path = Path(file_path)
                file_name = file_path.name

            yield Document(
                content=f"Indexing [{idx + 1}/{n_files}]: {file_name}",
                channel="debug",
            )

            try:
                pipeline = self.route(file_path)
                file_id, docs = yield from pipeline.stream(
                    file_path, reindex=reindex, **kwargs
                )
                all_docs.extend(docs)
                file_ids.append(file_id)
                errors.append(None)
                yield Document(
                    content={
                        "file_path": file_path,
                        "file_name": file_name,
                        "status": "success",
                    },
                    channel="index",
                )
            except Exception as e:
                logger.exception(e)
                file_ids.append(None)
                errors.append(str(e))
                yield Document(
                    content={
                        "file_path": file_path,
                        "file_name": file_name,
                        "status": "failed",
                        "message": str(e),
                    },
                    channel="index",
                )

        return file_ids, errors, all_docs


================================================
FILE: libs/ktem/ktem/index/file/ui.py
================================================
import html
import json
import os
import shutil
import tempfile
import zipfile
from copy import deepcopy
from pathlib import Path
from typing import Generator

import gradio as gr
import pandas as pd
from gradio.data_classes import FileData
from gradio.utils import NamedString
from ktem.app import BasePage
from ktem.db.engine import engine
from ktem.utils.render import Render
from sqlalchemy import select
from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings

from ...utils.commands import WEB_SEARCH_COMMAND
from ...utils.rate_limit import check_rate_limit
from .utils import download_arxiv_pdf, is_arxiv_url

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
DOWNLOAD_MESSAGE = "Start download"
MAX_FILENAME_LENGTH = 20
MAX_FILE_COUNT = 200

chat_input_focus_js = """
function() {
    let chatInput = document.querySelector("#chat-input textarea");
    chatInput.focus();
}
"""

chat_input_focus_js_with_submit = """
function() {
    let chatInput = document.querySelector("#chat-input textarea");
    let chatInputSubmit = document.querySelector("#chat-input button.submit-button");
    chatInputSubmit.click();
    chatInput.focus();
}
"""

update_file_list_js = """
function(file_list) {
    var values = [];
    for (var i = 0; i < file_list.length; i++) {
        values.push({
            key: file_list[i][0],
            value: '"' + file_list[i][0] + '"',
        });
    }

    // manually push web search tag
    values.push({
        key: "web_search",
        value: '"web_search"',
    });

    var tribute = new Tribute({
        values: values,
        noMatchTemplate: "",
        allowSpaces: true,
    })
    input_box = document.querySelector('#chat-input textarea');
    tribute.detach(input_box);
    tribute.attach(input_box);
}
""".replace(
    "web_search", WEB_SEARCH_COMMAND
)


class File(gr.File):
    """Subclass from gr.File to maintain the original filename

    The issue happens when user uploads file with name like: !@#$%%^&*().pdf
    """

    def _process_single_file(self, f: FileData) -> NamedString | bytes:
        file_name = f.path
        if self.type == "filepath":
            if f.orig_name and Path(file_name).name != f.orig_name:
                file_name = str(Path(file_name).parent / f.orig_name)
                os.rename(f.path, file_name)
            file = tempfile.NamedTemporaryFile(delete=False, dir=self.GRADIO_CACHE)
            file.name = file_name
            return NamedString(file_name)
        elif self.type == "binary":
            with open(file_name, "rb") as file_data:
                return file_data.read()
        else:
            raise ValueError(
                "Unknown type: "
                + str(type)
                + ". Please choose from: 'filepath', 'binary'."
            )


class DirectoryUpload(BasePage):
    def __init__(self, app, index):
        super().__init__(app)
        self._index = index
        self._supported_file_types_str = self._index.config.get(
            "supported_file_types", ""
        )
        self._supported_file_types = [
            each.strip() for each in self._supported_file_types_str.split(",")
        ]
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Accordion(label="Directory upload", open=False):
            gr.Markdown(f"Supported file types: {self._supported_file_types_str}")
            self.path = gr.Textbox(
                placeholder="Directory path...", lines=1, max_lines=1, container=False
            )
            with gr.Accordion("Advanced indexing options", open=False):
                with gr.Row():
                    self.reindex = gr.Checkbox(
                        value=False, label="Force reindex file", container=False
                    )

            self.upload_button = gr.Button("Upload and Index")


class FileIndexPage(BasePage):
    def __init__(self, app, index):
        super().__init__(app)
        self._index = index
        self._supported_file_types_str = self._index.config.get(
            "supported_file_types", ""
        )
        self._supported_file_types = [
            each.strip() for each in self._supported_file_types_str.split(",")
        ]
        self.selected_panel_false = "Selected file: (please select above)"
        self.selected_panel_true = "Selected file: {name}"
        # TODO: on_building_ui is not correctly named if it's always called in
        # the constructor
        self.public_events = [f"onFileIndex{index.id}Changed"]

        if not KH_DEMO_MODE:
            self.on_building_ui()

    def upload_instruction(self) -> str:
        msgs = []
        if self._supported_file_types:
            msgs.append(f"- Supported file types: {self._supported_file_types_str}")

        if max_file_size := self._index.config.get("max_file_size", 0):
            msgs.append(f"- Maximum file size: {max_file_size} MB")

        if max_number_of_files := self._index.config.get("max_number_of_files", 0):
            msgs.append(f"- The index can have maximum {max_number_of_files} files")

        if msgs:
            return "\n".join(msgs)

        return ""

    def render_file_list(self):
        self.filter = gr.Textbox(
            value="",
            label="Filter by name:",
            info=(
                "(1) Case-insensitive. "
                "(2) Search with empty string to show all files."
            ),
        )
        self.file_list_state = gr.State(value=None)
        self.file_list = gr.DataFrame(
            headers=[
                "id",
                "name",
                "size",
                "tokens",
                "loader",
                "date_created",
            ],
            column_widths=[0, 50, 8, 7, 15, 20],
            interactive=False,
            wrap=False,
            elem_id="file_list_view",
        )

        with gr.Row():

            self.chat_button = gr.Button(
                "Go to Chat",
                visible=False,
            )
            self.is_zipped_state = gr.State(value=False)
            self.download_single_button = gr.DownloadButton(
                "Download",
                visible=False,
            )
            self.delete_button = gr.Button(
                "Delete",
                variant="stop",
                visible=False,
            )
            self.deselect_button = gr.Button(
                "Close",
                visible=False,
            )

        with gr.Row() as self.selection_info:
            self.selected_file_id = gr.State(value=None)
            with gr.Column(scale=2):
                self.selected_panel = gr.Markdown(self.selected_panel_false)

        self.chunks = gr.HTML(visible=False)

        with gr.Accordion("Advance options", open=False):
            with gr.Row():
                if not KH_SSO_ENABLED:
                    self.download_all_button = gr.DownloadButton(
                        "Download all files",
                    )
                self.delete_all_button = gr.Button(
                    "Delete all files",
                    variant="stop",
                    visible=True,
                )
                self.delete_all_button_confirm = gr.Button(
                    "Confirm delete", variant="stop", visible=False
                )
                self.delete_all_button_cancel = gr.Button("Cancel", visible=False)

    def render_group_list(self):
        self.group_list_state = gr.State(value=None)
        self.group_list = gr.DataFrame(
            headers=[
                "id",
                "name",
                "files",
                "date_created",
            ],
            column_widths=[0, 25, 55, 20],
            interactive=False,
            wrap=False,
        )

        with gr.Row():
            self.group_add_button = gr.Button(
                "Add",
                variant="primary",
            )
            self.group_chat_button = gr.Button(
                "Go to Chat",
                visible=False,
            )
            self.group_delete_button = gr.Button(
                "Delete",
                variant="stop",
                visible=False,
            )
            self.group_close_button = gr.Button(
                "Close",
                visible=False,
            )

        with gr.Column(visible=False) as self._group_info_panel:
            self.selected_group_id = gr.State(value=None)
            self.group_label = gr.Markdown()
            self.group_name = gr.Textbox(
                label="Group name",
                placeholder="Group name",
                lines=1,
                max_lines=1,
            )
            self.group_files = gr.Dropdown(
                label="Attached files",
                multiselect=True,
            )
            self.group_save_button = gr.Button(
                "Save",
                variant="primary",
            )

    def on_building_ui(self):
        """Build the UI of the app"""
        with gr.Row():
            with gr.Column(scale=1):
                with gr.Column() as self.upload:
                    with gr.Tab("Upload Files"):
                        self.files = File(
                            file_types=self._supported_file_types,
                            file_count="multiple",
                            container=True,
                            show_label=False,
                        )

                        msg = self.upload_instruction()
                        if msg:
                            gr.Markdown(msg)

                    with gr.Tab("Use Web Links"):
                        self.urls = gr.Textbox(
                            label="Input web URLs",
                            lines=8,
                        )
                        gr.Markdown("(separated by new line)")

                    with gr.Accordion("Advanced indexing options", open=False):
                        with gr.Row():
                            self.reindex = gr.Checkbox(
                                value=False, label="Force reindex file", container=False
                            )

                    self.upload_button = gr.Button(
                        "Upload and Index", variant="primary"
                    )

            with gr.Column(scale=4):
                with gr.Column(visible=False) as self.upload_progress_panel:
                    gr.Markdown("## Upload Progress")
                    with gr.Row():
                        self.upload_result = gr.Textbox(
                            lines=1, max_lines=20, label="Upload result"
                        )
                        self.upload_info = gr.Textbox(
                            lines=1, max_lines=20, label="Upload info"
                        )
                    self.btn_close_upload_progress_panel = gr.Button(
                        "Clear Upload Info and Close",
                        variant="secondary",
                        elem_classes=["right-button"],
                    )

                with gr.Tab("Files"):
                    self.render_file_list()

                with gr.Tab("Groups"):
                    self.render_group_list()

    def on_subscribe_public_events(self):
        """Subscribe to the declared public event of the app"""
        if KH_DEMO_MODE:
            return

        self._app.subscribe_event(
            name=f"onFileIndex{self._index.id}Changed",
            definition={
                "fn": self.list_file_names,
                "inputs": [self.file_list_state],
                "outputs": [self.group_files],
                "show_progress": "hidden",
            },
        )

        if self._app.f_user_management:
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.list_file,
                    "inputs": [self._app.user_id],
                    "outputs": [self.file_list_state, self.file_list],
                    "show_progress": "hidden",
                },
            )
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.list_group,
                    "inputs": [self._app.user_id, self.file_list_state],
                    "outputs": [self.group_list_state, self.group_list],
                    "show_progress": "hidden",
                },
            )
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.list_file_names,
                    "inputs": [self.file_list_state],
                    "outputs": [self.group_files],
                    "show_progress": "hidden",
                },
            )
            self._app.subscribe_event(
                name="onSignOut",
                definition={
                    "fn": self.list_file,
                    "inputs": [self._app.user_id],
                    "outputs": [self.file_list_state, self.file_list],
                    "show_progress": "hidden",
                },
            )

    def file_selected(self, file_id):
        chunks = []
        if file_id is not None:
            # get the chunks

            Index = self._index._resources["Index"]
            with Session(engine) as session:
                matches = session.execute(
                    select(Index).where(
                        Index.source_id == file_id,
                        Index.relation_type == "document",
                    )
                )
                doc_ids = [doc.target_id for (doc,) in matches]
                docs = self._index._docstore.get(doc_ids)
                docs = sorted(
                    docs, key=lambda x: x.metadata.get("page_label", float("inf"))
                )

                for idx, doc in enumerate(docs):
                    title = html.escape(
                        f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text
                    )
                    doc_type = doc.metadata.get("type", "text")
                    content = ""
                    if doc_type == "text":
                        content = html.escape(doc.text)
                    elif doc_type == "table":
                        content = Render.table(doc.text)
                    elif doc_type == "image":
                        content = Render.image(
                            url=doc.metadata.get("image_origin", ""), text=doc.text
                        )

                    header_prefix = f"[{idx+1}/{len(docs)}]"
                    if doc.metadata.get("page_label"):
                        header_prefix += f" [Page {doc.metadata['page_label']}]"

                    chunks.append(
                        Render.collapsible(
                            header=f"{header_prefix} {title}",
                            content=content,
                        )
                    )
        return (
            gr.update(value="".join(chunks), visible=file_id is not None),
            gr.update(visible=file_id is not None),
            gr.update(visible=file_id is not None),
            gr.update(visible=file_id is not None),
            gr.update(visible=file_id is not None),
        )

    def delete_event(self, file_id):
        file_name = ""
        with Session(engine) as session:
            source = session.execute(
                select(self._index._resources["Source"]).where(
                    self._index._resources["Source"].id == file_id
                )
            ).first()
            if source:
                file_name = source[0].name
                session.delete(source[0])

            vs_ids, ds_ids = [], []
            index = session.execute(
                select(self._index._resources["Index"]).where(
                    self._index._resources["Index"].source_id == file_id
                )
            ).all()
            for each in index:
                if each[0].relation_type == "vector":
                    vs_ids.append(each[0].target_id)
                elif each[0].relation_type == "document":
                    ds_ids.append(each[0].target_id)
                session.delete(each[0])
            session.commit()

        if vs_ids:
            self._index._vs.delete(vs_ids)
        self._index._docstore.delete(ds_ids)

        gr.Info(f"File {file_name} has been deleted")

        return None, self.selected_panel_false

    def delete_no_event(self):
        return (
            gr.update(visible=True),
            gr.update(visible=False),
        )

    def download_single_file(self, is_zipped_state, file_id):
        with Session(engine) as session:
            source = session.execute(
                select(self._index._resources["Source"]).where(
                    self._index._resources["Source"].id == file_id
                )
            ).first()
        if source:
            target_file_name = Path(source[0].name)
        zip_files = []
        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):
            if target_file_name.stem in file_name:
                zip_files.append(
                    os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name)
                )
        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):
            if target_file_name.stem in file_name:
                zip_files.append(
                    os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)
                )
        zip_file_path = os.path.join(
            flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem
        )
        with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe:
            for file in zip_files:
                zipMe.write(file, arcname=os.path.basename(file))

        if is_zipped_state:
            new_button = gr.DownloadButton(label="Download", value=None)
        else:
            new_button = gr.DownloadButton(
                label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip"
            )

        return not is_zipped_state, new_button

    def download_single_file_simple(self, is_zipped_state, file_html, file_id):
        with Session(engine) as session:
            source = session.execute(
                select(self._index._resources["Source"]).where(
                    self._index._resources["Source"].id == file_id
                )
            ).first()
        if source:
            target_file_name = Path(source[0].name)

        # create a temporary file with a path to export
        output_file_path = os.path.join(
            flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem + ".html"
        )
        with open(output_file_path, "w") as f:
            f.write(file_html)

        if is_zipped_state:
            new_button = gr.DownloadButton(label="Download", value=None)
        else:
            # export the file path
            new_button = gr.DownloadButton(
                label=DOWNLOAD_MESSAGE,
                value=output_file_path,
            )

        return not is_zipped_state, new_button

    def download_all_files(self):
        if self._index.config.get("private", False):
            raise gr.Error("This feature is not available for private collection.")

        zip_files = []
        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):
            zip_files.append(os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name))
        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):
            zip_files.append(
                os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)
            )
        zip_file_path = os.path.join(flowsettings.KH_ZIP_OUTPUT_DIR, "all")
        with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe:
            for file in zip_files:
                arcname = Path(file)
                zipMe.write(file, arcname=arcname.name)
        return gr.DownloadButton(label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip")

    def delete_all_files(self, file_list):
        for file_id in file_list.id.values:
            self.delete_event(file_id)

    def set_file_id_selector(self, selected_file_id):
        return [selected_file_id, "select", gr.Tabs(selected="chat-tab")]

    def show_delete_all_confirm(self, file_list):
        # when the list of files is empty it shows a single line with id equal to -
        if len(file_list) == 0 or (
            len(file_list) == 1 and file_list.id.values[0] == "-"
        ):
            gr.Info("No file to delete")
            return [
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ]
        else:
            return [
                gr.update(visible=False),
                gr.update(visible=True),
                gr.update(visible=True),
            ]

    def on_register_quick_uploads(self):
        try:
            # quick file upload event registration of first Index only
            if self._index.id == 1:
                self.quick_upload_state = gr.State(value=[])
                print("Setting up quick upload event")

                # override indexing function from chat page
                self._app.chat_page.first_indexing_url_fn = (
                    self.index_fn_url_with_default_loaders
                )

                if not KH_DEMO_MODE:
                    quickUploadedEvent = (
                        self._app.chat_page.quick_file_upload.upload(
                            fn=lambda: gr.update(
                                value="Please wait for the indexing process "
                                "to complete before adding your question."
                            ),
                            outputs=self._app.chat_page.quick_file_upload_status,
                        )
                        .then(
                            fn=self.index_fn_file_with_default_loaders,
                            inputs=[
                                self._app.chat_page.quick_file_upload,
                                gr.State(value=False),
                                self._app.settings_state,
                                self._app.user_id,
                            ],
                            outputs=self.quick_upload_state,
                            concurrency_limit=10,
                        )
                        .success(
                            fn=lambda: [
                                gr.update(value=None),
                                gr.update(value="select"),
                            ],
                            outputs=[
                                self._app.chat_page.quick_file_upload,
                                self._app.chat_page._indices_input[0],
                            ],
                        )
                    )
                    for event in self._app.get_event(
                        f"onFileIndex{self._index.id}Changed"
                    ):
                        quickUploadedEvent = quickUploadedEvent.then(**event)

                    quickUploadedEvent = (
                        quickUploadedEvent.success(
                            fn=lambda x: x,
                            inputs=self.quick_upload_state,
                            outputs=self._app.chat_page._indices_input[1],
                        )
                        .then(
                            fn=lambda: gr.update(value="Indexing completed."),
                            outputs=self._app.chat_page.quick_file_upload_status,
                        )
                        .then(
                            fn=self.list_file,
                            inputs=[self._app.user_id, self.filter],
                            outputs=[self.file_list_state, self.file_list],
                            concurrency_limit=20,
                        )
                        .then(
                            fn=lambda: True,
                            inputs=None,
                            outputs=None,
                            js=chat_input_focus_js_with_submit,
                        )
                    )

                quickURLUploadedEvent = (
                    self._app.chat_page.quick_urls.submit(
                        fn=lambda: gr.update(
                            value="Please wait for the indexing process "
                            "to complete before adding your question."
                        ),
                        outputs=self._app.chat_page.quick_file_upload_status,
                    )
                    .then(
                        fn=self.index_fn_url_with_default_loaders,
                        inputs=[
                            self._app.chat_page.quick_urls,
                            gr.State(value=False),
                            self._app.settings_state,
                            self._app.user_id,
                        ],
                        outputs=self.quick_upload_state,
                        concurrency_limit=10,
                    )
                    .success(
                        fn=lambda: [
                            gr.update(value=None),
                            gr.update(value="select"),
                        ],
                        outputs=[
                            self._app.chat_page.quick_urls,
                            self._app.chat_page._indices_input[0],
                        ],
                    )
                )
                for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
                    quickURLUploadedEvent = quickURLUploadedEvent.then(**event)

                quickURLUploadedEvent = quickURLUploadedEvent.success(
                    fn=lambda x: x,
                    inputs=self.quick_upload_state,
                    outputs=self._app.chat_page._indices_input[1],
                ).then(
                    fn=lambda: gr.update(value="Indexing completed."),
                    outputs=self._app.chat_page.quick_file_upload_status,
                )

                if not KH_DEMO_MODE:
                    quickURLUploadedEvent = quickURLUploadedEvent.then(
                        fn=self.list_file,
                        inputs=[self._app.user_id, self.filter],
                        outputs=[self.file_list_state, self.file_list],
                        concurrency_limit=20,
                    )

                quickURLUploadedEvent = quickURLUploadedEvent.then(
                    fn=lambda: True,
                    inputs=None,
                    outputs=None,
                    js=chat_input_focus_js_with_submit,
                )

        except Exception as e:
            print(e)

    def on_register_events(self):
        """Register all events to the app"""
        self.on_register_quick_uploads()

        if KH_DEMO_MODE:
            return

        onDeleted = (
            self.delete_button.click(
                fn=self.delete_event,
                inputs=[self.selected_file_id],
                outputs=None,
            )
            .then(
                fn=lambda: (None, self.selected_panel_false),
                inputs=[],
                outputs=[self.selected_file_id, self.selected_panel],
                show_progress="hidden",
            )
            .then(
                fn=self.list_file,
                inputs=[self._app.user_id, self.filter],
                outputs=[self.file_list_state, self.file_list],
            )
            .then(
                fn=self.file_selected,
                inputs=[self.selected_file_id],
                outputs=[
                    self.chunks,
                    self.deselect_button,
                    self.delete_button,
                    self.download_single_button,
                    self.chat_button,
                ],
                show_progress="hidden",
            )
        )
        for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
            onDeleted = onDeleted.then(**event)

        self.deselect_button.click(
            fn=lambda: (None, self.selected_panel_false),
            inputs=[],
            outputs=[self.selected_file_id, self.selected_panel],
            show_progress="hidden",
        ).then(
            fn=self.file_selected,
            inputs=[self.selected_file_id],
            outputs=[
                self.chunks,
                self.deselect_button,
                self.delete_button,
                self.download_single_button,
                self.chat_button,
            ],
            show_progress="hidden",
        )

        self.chat_button.click(
            fn=self.set_file_id_selector,
            inputs=[self.selected_file_id],
            outputs=[
                self._index.get_selector_component_ui().selector,
                self._index.get_selector_component_ui().mode,
                self._app.tabs,
            ],
        )

        if not KH_SSO_ENABLED:
            self.download_all_button.click(
                fn=self.download_all_files,
                inputs=[],
                outputs=self.download_all_button,
                show_progress="hidden",
            )

        self.delete_all_button.click(
            self.show_delete_all_confirm,
            [self.file_list],
            [
                self.delete_all_button,
                self.delete_all_button_confirm,
                self.delete_all_button_cancel,
            ],
        )
        self.delete_all_button_cancel.click(
            lambda: [
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ],
            None,
            [
                self.delete_all_button,
                self.delete_all_button_confirm,
                self.delete_all_button_cancel,
            ],
        )

        self.delete_all_button_confirm.click(
            fn=self.delete_all_files,
            inputs=[self.file_list],
            outputs=[],
            show_progress="hidden",
        ).then(
            fn=self.list_file,
            inputs=[self._app.user_id, self.filter],
            outputs=[self.file_list_state, self.file_list],
        ).then(
            lambda: [
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ],
            None,
            [
                self.delete_all_button,
                self.delete_all_button_confirm,
                self.delete_all_button_cancel,
            ],
        )

        if not KH_SSO_ENABLED:
            self.download_single_button.click(
                fn=self.download_single_file,
                inputs=[self.is_zipped_state, self.selected_file_id],
                outputs=[self.is_zipped_state, self.download_single_button],
                show_progress="hidden",
            )
        else:
            self.download_single_button.click(
                fn=self.download_single_file_simple,
                inputs=[self.is_zipped_state, self.chunks, self.selected_file_id],
                outputs=[self.is_zipped_state, self.download_single_button],
                show_progress="hidden",
            )

        onUploaded = (
            self.upload_button.click(
                fn=lambda: gr.update(visible=True),
                outputs=[self.upload_progress_panel],
            )
            .then(
                fn=self.index_fn,
                inputs=[
                    self.files,
                    self.urls,
                    self.reindex,
                    self._app.settings_state,
                    self._app.user_id,
                ],
                outputs=[self.upload_result, self.upload_info],
                concurrency_limit=20,
            )
            .then(
                fn=lambda: gr.update(value=""),
                outputs=[self.urls],
            )
        )

        uploadedEvent = onUploaded.then(
            fn=self.list_file,
            inputs=[self._app.user_id, self.filter],
            outputs=[self.file_list_state, self.file_list],
            concurrency_limit=20,
        )
        for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
            uploadedEvent = uploadedEvent.then(**event)

        _ = onUploaded.success(
            fn=lambda: None,
            outputs=[self.files],
        )

        self.btn_close_upload_progress_panel.click(
            fn=lambda: (gr.update(visible=False), "", ""),
            outputs=[self.upload_progress_panel, self.upload_result, self.upload_info],
        )

        self.file_list.select(
            fn=self.interact_file_list,
            inputs=[self.file_list],
            outputs=[self.selected_file_id, self.selected_panel],
            show_progress="hidden",
        ).then(
            fn=self.file_selected,
            inputs=[self.selected_file_id],
            outputs=[
                self.chunks,
                self.deselect_button,
                self.delete_button,
                self.download_single_button,
                self.chat_button,
            ],
            show_progress="hidden",
        )

        self.group_list.select(
            fn=self.interact_group_list,
            inputs=[self.group_list_state],
            outputs=[
                self.group_label,
                self.selected_group_id,
                self.group_name,
                self.group_files,
            ],
            show_progress="hidden",
        ).then(
            fn=lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=True),
                gr.update(visible=True),
                gr.update(visible=True),
            ),
            outputs=[
                self._group_info_panel,
                self.group_add_button,
                self.group_close_button,
                self.group_delete_button,
                self.group_chat_button,
            ],
        )

        self.filter.submit(
            fn=self.list_file,
            inputs=[self._app.user_id, self.filter],
            outputs=[self.file_list_state, self.file_list],
            show_progress="hidden",
        )

        self.group_add_button.click(
            fn=lambda: [
                gr.update(visible=False),
                gr.update(value="### Add new group"),
                gr.update(visible=True),
                gr.update(value=""),
                gr.update(value=[]),
                None,
            ],
            outputs=[
                self.group_add_button,
                self.group_label,
                self._group_info_panel,
                self.group_name,
                self.group_files,
                self.selected_group_id,
            ],
        )

        self.group_chat_button.click(
            fn=self.set_group_id_selector,
            inputs=[self.selected_group_id],
            outputs=[
                self._index.get_selector_component_ui().selector,
                self._index.get_selector_component_ui().mode,
                self._app.tabs,
            ],
        )

        onGroupClosedEvent = {
            "fn": lambda: [
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False),
                None,
            ],
            "outputs": [
                self.group_add_button,
                self._group_info_panel,
                self.group_close_button,
                self.group_delete_button,
                self.group_chat_button,
                self.selected_group_id,
            ],
        }
        self.group_close_button.click(**onGroupClosedEvent)
        onGroupSaved = (
            self.group_save_button.click(
                fn=self.save_group,
                inputs=[
                    self.selected_group_id,
                    self.group_name,
                    self.group_files,
                    self._app.user_id,
                ],
            )
            .then(
                self.list_group,
                inputs=[self._app.user_id, self.file_list_state],
                outputs=[self.group_list_state, self.group_list],
            )
            .then(**onGroupClosedEvent)
        )
        onGroupDeleted = (
            self.group_delete_button.click(
                fn=self.delete_group,
                inputs=[self.selected_group_id],
            )
            .then(
                self.list_group,
                inputs=[self._app.user_id, self.file_list_state],
                outputs=[self.group_list_state, self.group_list],
            )
            .then(**onGroupClosedEvent)
        )

        for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
            onGroupDeleted = onGroupDeleted.then(**event)
            onGroupSaved = onGroupSaved.then(**event)

    def _on_app_created(self):
        """Called when the app is created"""
        if KH_DEMO_MODE:
            return

        self._app.app.load(
            self.list_file,
            inputs=[self._app.user_id, self.filter],
            outputs=[self.file_list_state, self.file_list],
        ).then(
            self.list_group,
            inputs=[self._app.user_id, self.file_list_state],
            outputs=[self.group_list_state, self.group_list],
        ).then(
            self.list_file_names,
            inputs=[self.file_list_state],
            outputs=[self.group_files],
        )

    def _may_extract_zip(self, files, zip_dir: str):
        """Handle zip files"""
        zip_files = [file for file in files if file.endswith(".zip")]
        remaining_files = [file for file in files if not file.endswith("zip")]
        errors: list[str] = []

        # Clean-up <zip_dir> before unzip to remove old files
        shutil.rmtree(zip_dir, ignore_errors=True)

        # Unzip
        for zip_file in zip_files:
            # Prepare new zip output dir, separated for each files
            basename = os.path.splitext(os.path.basename(zip_file))[0]
            zip_out_dir = os.path.join(zip_dir, basename)
            os.makedirs(zip_out_dir, exist_ok=True)

            with zipfile.ZipFile(zip_file, "r") as zip_ref:
                zip_ref.extractall(zip_out_dir)

        n_zip_file = 0
        for root, dirs, files in os.walk(zip_dir):
            for file in files:
                ext = os.path.splitext(file)[1]

                # only allow supported file-types ( not zip )
                if ext not in [".zip"] and ext in self._supported_file_types:
                    remaining_files += [os.path.join(root, file)]
                    n_zip_file += 1

        if n_zip_file > 0:
            print(f"Update zip files: {n_zip_file}")

        return remaining_files, errors

    def index_fn(
        self, files, urls, reindex: bool, settings, user_id
    ) -> Generator[tuple[str, str], None, None]:
        """Upload and index the files

        Args:
            files: the list of files to be uploaded
            urls: list of web URLs to be indexed
            reindex: whether to reindex the files
            selected_files: the list of files already selected
            settings: the settings of the app
        """
        if urls:
            files = [it.strip() for it in urls.split("\n")]
            errors = self.validate_urls(files)
        else:
            if not files:
                gr.Info("No uploaded file")
                yield "", ""
                return
            files, unzip_errors = self._may_extract_zip(
                files, flowsettings.KH_ZIP_INPUT_DIR
            )
            errors = self.validate_files(files)
            errors.extend(unzip_errors)

        if errors:
            gr.Warning(", ".join(errors))
            yield "", ""
            return

        gr.Info(f"Start indexing {len(files)} files...")

        # get the pipeline
        indexing_pipeline = self._index.get_indexing_pipeline(settings, user_id)

        outputs, debugs = [], []
        # stream the output
        output_stream = indexing_pipeline.stream(files, reindex=reindex)
        try:
            while True:
                response = next(output_stream)
                if response is None:
                    continue
                if response.channel == "index":
                    if response.content["status"] == "success":
                        outputs.append(f"\u2705 | {response.content['file_name']}")
                    elif response.content["status"] == "failed":
                        outputs.append(
                            f"\u274c | {response.content['file_name']}: "
                            f"{response.content['message']}"
                        )
                elif response.channel == "debug":
                    debugs.append(response.text)
                yield "\n".join(outputs), "\n".join(debugs)
        except StopIteration as e:
            results, index_errors, docs = e.value
        except Exception as e:
            debugs.append(f"Error: {e}")
            yield "\n".join(outputs), "\n".join(debugs)
            return

        n_successes = len([_ for _ in results if _])
        if n_successes:
            gr.Info(f"Successfully index {n_successes} files")
        n_errors = len([_ for _ in errors if _])
        if n_errors:
            gr.Warning(f"Have errors for {n_errors} files")

        return results

    def index_fn_file_with_default_loaders(
        self, files, reindex: bool, settings, user_id
    ) -> list["str"]:
        """Function for quick upload with default loaders

        Args:
            files: the list of files to be uploaded
            reindex: whether to reindex the files
            selected_files: the list of files already selected
            settings: the settings of the app
        """
        print("Overriding with default loaders")
        exist_ids = []
        to_process_files = []
        for str_file_path in files:
            file_path = Path(str(str_file_path))
            exist_id = (
                self._index.get_indexing_pipeline(settings, user_id)
                .route(file_path)
                .get_id_if_exists(file_path)
            )
            if exist_id:
                exist_ids.append(exist_id)
            else:
                to_process_files.append(str_file_path)

        returned_ids = []
        settings = deepcopy(settings)
        settings[f"index.options.{self._index.id}.reader_mode"] = "default"
        settings[f"index.options.{self._index.id}.quick_index_mode"] = True
        if to_process_files:
            _iter = self.index_fn(to_process_files, [], reindex, settings, user_id)
            try:
                while next(_iter):
                    pass
            except StopIteration as e:
                returned_ids = e.value

        return exist_ids + returned_ids

    def index_fn_url_with_default_loaders(
        self,
        urls,
        reindex: bool,
        settings,
        user_id,
        request: gr.Request,
    ):
        if KH_DEMO_MODE:
            check_rate_limit("file_upload", request)

        returned_ids: list[str] = []
        settings = deepcopy(settings)
        settings[f"index.options.{self._index.id}.reader_mode"] = "default"
        settings[f"index.options.{self._index.id}.quick_index_mode"] = True

        if KH_DEMO_MODE:
            urls_splitted = urls.split("\n")
            if not all(is_arxiv_url(url) for url in urls_splitted):
                raise ValueError("All URLs must be valid arXiv URLs")

            output_files = [
                download_arxiv_pdf(
                    url,
                    output_path=os.environ.get("GRADIO_TEMP_DIR", "/tmp"),
                )
                for url in urls_splitted
            ]

            exist_ids = []
            to_process_files = []
            for str_file_path in output_files:
                file_path = Path(str_file_path)
                exist_id = (
                    self._index.get_indexing_pipeline(settings, user_id)
                    .route(file_path)
                    .get_id_if_exists(file_path)
                )
                if exist_id:
                    exist_ids.append(exist_id)
                else:
                    to_process_files.append(str_file_path)

            returned_ids = []
            if to_process_files:
                _iter = self.index_fn(to_process_files, [], reindex, settings, user_id)
                try:
                    while next(_iter):
                        pass
                except StopIteration as e:
                    returned_ids = e.value

            returned_ids = exist_ids + returned_ids
        else:
            if urls:
                _iter = self.index_fn([], urls, reindex, settings, user_id)
                try:
                    while next(_iter):
                        pass
                except StopIteration as e:
                    returned_ids = e.value

        return returned_ids

    def index_files_from_dir(
        self, folder_path, reindex, settings, user_id
    ) -> Generator[tuple[str, str], None, None]:
        """This should be constructable by users

        It means that the users can build their own index.
        Build your own index:
            - Input:
                - Type: based on the type, then there are ranges of. Use can select
                multiple panels:
                    - Panels
                    - Data sources
                    - Include patterns
                    - Exclude patterns
                - Indexing functions. Can be a list of indexing functions. Each declared
                function is:
                    - Condition (the source that will go through this indexing function)
                    - Function (the pipeline that run this)
            - Output: artifacts that can be used to -> this is the artifacts that we
            wish
                - Build the UI
                    - Upload page: fixed standard, based on the type
                    - Read page: fixed standard, based on the type
                    - Delete page: fixed standard, based on the type
                - Build the index function
                - Build the chat function

        Step:
            1. Decide on the artifacts
            2. Implement the transformation from artifacts to UI
        """
        if not folder_path:
            yield "", ""
            return

        import fnmatch
        from pathlib import Path

        include_patterns: list[str] = []
        exclude_patterns: list[str] = ["*.png", "*.gif", "*/.*"]
        if include_patterns and exclude_patterns:
            raise ValueError("Cannot have both include and exclude patterns")

        # clean up the include patterns
        for idx in range(len(include_patterns)):
            if include_patterns[idx].startswith("*"):
                include_patterns[idx] = str(Path.cwd() / "**" / include_patterns[idx])
            else:
                include_patterns[idx] = str(
                    Path.cwd() / include_patterns[idx].strip("/")
                )

        # clean up the exclude patterns
        for idx in range(len(exclude_patterns)):
            if exclude_patterns[idx].startswith("*"):
                exclude_patterns[idx] = str(Path.cwd() / "**" / exclude_patterns[idx])
            else:
                exclude_patterns[idx] = str(
                    Path.cwd() / exclude_patterns[idx].strip("/")
                )

        # get the files
        files: list[str] = [str(p) for p in Path(folder_path).glob("**/*.*")]
        if include_patterns:
            for p in include_patterns:
                files = fnmatch.filter(names=files, pat=p)

        if exclude_patterns:
            for p in exclude_patterns:
                files = [f for f in files if not fnmatch.fnmatch(name=f, pat=p)]

        yield from self.index_fn(files, [], reindex, settings, user_id)

    def format_size_human_readable(self, num: float | str, suffix="B"):
        try:
            num = float(num)
        except ValueError:
            return num

        for unit in ("", "K", "M", "G", "T", "P", "E", "Z"):
            if abs(num) < 1024.0:
                return f"{num:3.0f}{unit}{suffix}"
            num /= 1024.0
        return f"{num:.0f}Yi{suffix}"

    def list_file(self, user_id, name_pattern=""):
        if user_id is None:
            # not signed in
            return [], pd.DataFrame.from_records(
                [
                    {
                        "id": "-",
                        "name": "-",
                        "size": "-",
                        "tokens": "-",
                        "loader": "-",
                        "date_created": "-",
                    }
                ]
            )

        Source = self._index._resources["Source"]
        with Session(engine) as session:
            statement = select(Source)
            if self._index.config.get("private", False):
                statement = statement.where(Source.user == user_id)
            if name_pattern:
                statement = statement.where(Source.name.ilike(f"%{name_pattern}%"))
            results = [
                {
                    "id": each[0].id,
                    "name": each[0].name,
                    "size": self.format_size_human_readable(each[0].size),
                    "tokens": self.format_size_human_readable(
                        each[0].note.get("tokens", "-"), suffix=""
                    ),
                    "loader": each[0].note.get("loader", "-"),
                    "date_created": each[0].date_created.strftime("%Y-%m-%d %H:%M:%S"),
                }
                for each in session.execute(statement).all()
            ]

        if results:
            file_list = pd.DataFrame.from_records(results)
        else:
            file_list = pd.DataFrame.from_records(
                [
                    {
                        "id": "-",
                        "name": "-",
                        "size": "-",
                        "tokens": "-",
                        "loader": "-",
                        "date_created": "-",
                    }
                ]
            )

        return results, file_list

    def list_file_names(self, file_list_state):
        if file_list_state:
            file_names = [(item["name"], item["id"]) for item in file_list_state]
        else:
            file_names = []

        return gr.update(choices=file_names)

    def list_group(self, user_id, file_list):
        # supply file_list to display the file names in the group
        if file_list:
            file_id_to_name = {item["id"]: item["name"] for item in file_list}
        else:
            file_id_to_name = {}

        if user_id is None:
            # not signed in
            return [], pd.DataFrame.from_records(
                [
                    {
                        "id": "-",
                        "name": "-",
                        "files": "-",
                        "date_created": "-",
                    }
                ]
            )

        FileGroup = self._index._resources["FileGroup"]
        with Session(engine) as session:
            statement = select(FileGroup)
            if self._index.config.get("private", False):
                statement = statement.where(FileGroup.user == user_id)

            results = [
                {
                    "id": each[0].id,
                    "name": each[0].name,
                    "files": each[0].data.get("files", []),
                    "date_created": each[0].date_created.strftime("%Y-%m-%d %H:%M:%S"),
                }
                for each in session.execute(statement).all()
            ]

        if results:
            formated_results = deepcopy(results)
            for item in formated_results:
                file_names = [
                    file_id_to_name.get(file_id, "-") for file_id in item["files"]
                ]
                item["files"] = ", ".join(
                    f"'{it[:MAX_FILENAME_LENGTH]}..'"
                    if len(it) > MAX_FILENAME_LENGTH
                    else f"'{it}'"
                    for it in file_names
                )
                item_count = len(file_names)
                item_postfix = "s" if item_count > 1 else ""
                item["files"] = f"[{item_count} item{item_postfix}] " + item["files"]

            group_list = pd.DataFrame.from_records(formated_results)
        else:
            group_list = pd.DataFrame.from_records(
                [
                    {
                        "id": "-",
                        "name": "-",
                        "files": "-",
                        "date_created": "-",
                    }
                ]
            )

        return results, group_list

    def set_group_id_selector(self, selected_group_id):
        FileGroup = self._index._resources["FileGroup"]

        # check if group_name exist
        with Session(engine) as session:
            current_group = (
                session.query(FileGroup).filter_by(id=selected_group_id).first()
            )

        file_ids = [json.dumps(current_group.data["files"])]
        return [file_ids, "select", gr.Tabs(selected="chat-tab")]

    def save_group(self, group_id, group_name, group_files, user_id):
        FileGroup = self._index._resources["FileGroup"]
        current_group = None

        # check if group_name exist
        with Session(engine) as session:
            if group_id:
                current_group = session.query(FileGroup).filter_by(id=group_id).first()
                # update current group with new info
                current_group.name = group_name
                current_group.data["files"] = group_files  # Update the files
                session.commit()
            else:
                current_group = (
                    session.query(FileGroup)
                    .filter_by(
                        name=group_name,
                        user=user_id,
                    )
                    .first()
                )
                if current_group:
                    raise gr.Error(f"Group {group_name} already exists")

                current_group = FileGroup(
                    name=group_name,
                    data={"files": group_files},  # type: ignore
                    user=user_id,
                )
                session.add(current_group)
                session.commit()

            group_id = current_group.id

        gr.Info(f"Group {group_name} has been saved")
        return group_id

    def delete_group(self, group_id):
        if not group_id:
            raise gr.Error("No group is selected")

        FileGroup = self._index._resources["FileGroup"]
        with Session(engine) as session:
            group = session.execute(
                select(FileGroup).where(FileGroup.id == group_id)
            ).first()
            if group:
                item = group[0]
                group_name = item.name
                session.delete(item)
                session.commit()
                gr.Info(f"Group {group_name} has been deleted")
            else:
                raise gr.Error("No group found")

        return None

    def interact_file_list(self, list_files, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No file is uploaded")
            return None, self.selected_panel_false

        if not ev.selected:
            return None, self.selected_panel_false

        return list_files["id"][ev.index[0]], self.selected_panel_true.format(
            name=list_files["name"][ev.index[0]]
        )

    def interact_group_list(self, list_groups, ev: gr.SelectData):
        selected_id = ev.index[0]
        if (not ev.value or ev.value == "-") and selected_id == 0:
            raise gr.Error("No group is selected")

        selected_item = list_groups[selected_id]
        selected_group_id = selected_item["id"]
        return (
            "### Group Information",
            selected_group_id,
            selected_item["name"],
            selected_item["files"],
        )

    def validate_files(self, files: list[str]):
        """Validate if the files are valid"""
        paths = [Path(file) for file in files]
        errors = []
        if max_file_size := self._index.config.get("max_file_size", 0):
            errors_max_size = []
            for path in paths:
                if path.stat().st_size > max_file_size * 1e6:
                    errors_max_size.append(path.name)
            if errors_max_size:
                str_errors = ", ".join(errors_max_size)
                if len(str_errors) > 60:
                    str_errors = str_errors[:55] + "..."
                errors.append(
                    f"Maximum file size ({max_file_size} MB) exceeded: {str_errors}"
                )

        if max_number_of_files := self._index.config.get("max_number_of_files", 0):
            with Session(engine) as session:
                current_num_files = session.query(
                    self._index._resources["Source"].id
                ).count()
            if len(paths) + current_num_files > max_number_of_files:
                errors.append(
                    f"Maximum number of files ({max_number_of_files}) will be exceeded"
                )

        return errors

    def validate_urls(self, urls: list[str]):
        """Validate if the urls are valid"""
        errors = []
        for url in urls:
            if not url.startswith("http") and not url.startswith("https"):
                errors.append(f"Invalid url `{url}`")
        return errors


class FileSelector(BasePage):
    """File selector UI in the Chat page"""

    def __init__(self, app, index):
        super().__init__(app)
        self._index = index
        self.on_building_ui()

    def default(self):
        if self._app.f_user_management:
            return "disabled", [], -1
        return "disabled", [], 1

    def on_building_ui(self):
        default_mode, default_selector, user_id = self.default()

        self.mode = gr.Radio(
            value=default_mode,
            choices=[
                ("Search All", "all"),
                ("Search In File(s)", "select"),
            ],
            container=False,
        )
        self.selector = gr.Dropdown(
            label="Files",
            value=default_selector,
            choices=[],
            multiselect=True,
            container=False,
            interactive=True,
            visible=False,
        )
        self.selector_user_id = gr.State(value=user_id)
        self.selector_choices = gr.JSON(
            value=[],
            visible=False,
        )

    def on_register_events(self):
        self.mode.change(
            fn=lambda mode, user_id: (gr.update(visible=mode == "select"), user_id),
            inputs=[self.mode, self._app.user_id],
            outputs=[self.selector, self.selector_user_id],
        )
        # attach special event for the first index
        if self._index.id == 1:
            self.selector_choices.change(
                fn=None,
                inputs=[self.selector_choices],
                js=update_file_list_js,
                show_progress="hidden",
            )

    def as_gradio_component(self):
        return [self.mode, self.selector, self.selector_user_id]

    def get_selected_ids(self, components):
        mode, selected, user_id = components[0], components[1], components[2]
        if user_id is None:
            return []

        if mode == "disabled":
            return []
        elif mode == "select":
            return selected

        file_ids = []
        with Session(engine) as session:
            statement = select(self._index._resources["Source"].id)
            if self._index.config.get("private", False):
                statement = statement.where(
                    self._index._resources["Source"].user == user_id
                )
            results = session.execute(statement).all()
            for (id,) in results:
                file_ids.append(id)

        return file_ids

    def load_files(self, selected_files, user_id):
        options: list = []
        available_ids = []
        if user_id is None:
            # not signed in
            return gr.update(value=selected_files, choices=options), options

        with Session(engine) as session:
            # get file list from Source table
            statement = select(self._index._resources["Source"])
            if self._index.config.get("private", False):
                statement = statement.where(
                    self._index._resources["Source"].user == user_id
                )

            if KH_DEMO_MODE:
                # limit query by MAX_FILE_COUNT
                statement = statement.limit(MAX_FILE_COUNT)

            results = session.execute(statement).all()
            for result in results:
                available_ids.append(result[0].id)
                options.append((result[0].name, result[0].id))

            # get group list from FileGroup table
            FileGroup = self._index._resources["FileGroup"]
            statement = select(FileGroup)
            if self._index.config.get("private", False):
                statement = statement.where(FileGroup.user == user_id)
            results = session.execute(statement).all()
            for result in results:
                item = result[0]
                options.append(
                    (f"group: '{item.name}'", json.dumps(item.data.get("files", [])))
                )

        if selected_files:
            available_ids_set = set(available_ids)
            selected_files = [
                each for each in selected_files if each in available_ids_set
            ]

        return gr.update(value=selected_files, choices=options), options

    def _on_app_created(self):
        self._app.app.load(
            self.load_files,
            inputs=[self.selector, self._app.user_id],
            outputs=[self.selector, self.selector_choices],
        )

    def on_subscribe_public_events(self):
        self._app.subscribe_event(
            name=f"onFileIndex{self._index.id}Changed",
            definition={
                "fn": self.load_files,
                "inputs": [self.selector, self._app.user_id],
                "outputs": [self.selector, self.selector_choices],
                "show_progress": "hidden",
            },
        )
        if self._app.f_user_management:
            for event_name in ["onSignIn", "onSignOut"]:
                self._app.subscribe_event(
                    name=event_name,
                    definition={
                        "fn": self.load_files,
                        "inputs": [self.selector, self._app.user_id],
                        "outputs": [self.selector, self.selector_choices],
                        "show_progress": "hidden",
                    },
                )


================================================
FILE: libs/ktem/ktem/index/file/utils.py
================================================
import os

import requests

# regex patterns for Arxiv URL
ARXIV_URL_PATTERNS = [
    "https://arxiv.org/abs/",
    "https://arxiv.org/pdf/",
]

ILLEGAL_NAME_CHARS = ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]


def clean_name(name):
    for char in ILLEGAL_NAME_CHARS:
        name = name.replace(char, "_")
    return name


def is_arxiv_url(url):
    return any(url.startswith(pattern) for pattern in ARXIV_URL_PATTERNS)


# download PDF from Arxiv URL
def download_arxiv_pdf(url, output_path):
    if not is_arxiv_url(url):
        raise ValueError("Invalid Arxiv URL")

    is_abstract_url = "abs" in url
    if is_abstract_url:
        pdf_url = url.replace("abs", "pdf")
        abstract_url = url
    else:
        pdf_url = url
        abstract_url = url.replace("pdf", "abs")

    # get paper name from abstract url
    response = requests.get(abstract_url)

    # parse HTML response and get h1.title
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(response.content, "html.parser")
    name = clean_name(
        soup.find("h1", class_="title").text.strip().replace("Title:", "")
    )
    if not name:
        raise ValueError("Failed to get paper name")

    output_file_path = os.path.join(output_path, name + ".pdf")
    # prevent downloading if file already exists
    if not os.path.exists(output_file_path):
        response = requests.get(pdf_url)

        with open(output_file_path, "wb") as f:
            f.write(response.content)

    return output_file_path


================================================
FILE: libs/ktem/ktem/index/manager.py
================================================
from typing import Optional, Type

from ktem.db.models import engine
from sqlmodel import Session, select
from theflow.settings import settings
from theflow.utils.modules import import_dotted_string

from .base import BaseIndex
from .models import Index


class IndexManager:
    """Manage the application indices

    The index manager is responsible for:
        - Managing the range of possible indices and their extensions
        - Each actual index built by user

    Attributes:
        - indices: list of indices built by user
    """

    def __init__(self, app):
        self._app = app
        self._indices = []
        self._index_types: dict[str, Type[BaseIndex]] = {}

    @property
    def index_types(self) -> dict:
        """List the index_type of the index"""
        return self._index_types

    def build_index(self, name: str, config: dict, index_type: str):
        """Build the index

        Building the index simply means recording the index information into the
        database and returning the index object.

        Args:
            name (str): the name of the index
            config (dict): the config of the index
            index_type (str): the type of the index
            id (int, optional): the id of the index. If None, the id will be
                generated automatically. Defaults to None.

        Returns:
            BaseIndex: the index object
        """

        with Session(engine) as sess:
            entry = Index(name=name, config=config, index_type=index_type)
            sess.add(entry)
            sess.commit()
            sess.refresh(entry)

            try:
                # build the index
                index_cls = import_dotted_string(index_type, safe=False)
                index = index_cls(app=self._app, id=entry.id, name=name, config=config)
                index.on_create()

                # update the entry
                entry.config = index.config
                sess.commit()
            except Exception as e:
                sess.delete(entry)
                sess.commit()
                raise ValueError(f'Cannot create index "{name}": {e}')

        return index

    def update_index(self, id: int, name: str, config: dict):
        """Update the index information

        Args:
            id: the id of the index
            name: the new name of the index
            config: the new config of the index
        """
        with Session(engine) as sess:
            entry = sess.get(Index, id)
            if entry is None:
                raise ValueError(f"Index with id {id} does not exist")

            entry.name = name
            entry.config = config
            sess.commit()

        for index in self._indices:
            if index.id == id:
                index.name = name
                index.config = config
                break

    def start_index(self, id: int, name: str, config: dict, index_type: str):
        """Start the index

        Args:
            id (int): the id of the index
            name (str): the name of the index
            config (dict): the config of the index
            index_type (str): the type of the index
        """
        index_cls = import_dotted_string(index_type, safe=False)
        index = index_cls(app=self._app, id=id, name=name, config=config)
        index.on_start()

        self._indices.append(index)
        return index

    def delete_index(self, id: int):
        """Delete the index from the database"""
        index: Optional[BaseIndex] = None
        for _ in self._indices:
            if _.id == id:
                index = _
                break

        if index is None:
            raise ValueError(
                "Index does not exist. If you have already removed the index, "
                "please restart to reflect the changes."
            )

        try:
            try:
                # clean up
                index.on_delete()
            except Exception as e:
                print(f"Error while deleting index {index.name}: {e}")

            # remove from database
            with Session(engine) as sess:
                item = sess.query(Index).filter_by(id=id).first()
                sess.delete(item)
                sess.commit()

            new_indices = [_ for _ in self._indices if _.id != id]
            self._indices = new_indices
        except Exception as e:
            raise ValueError(f"Cannot delete index {index.name}: {e}")

    def load_index_types(self):
        """Load the supported index types"""
        self._index_types = {}

        # built-in index types
        from .file.index import FileIndex

        for index in [FileIndex]:
            self._index_types[f"{index.__module__}.{index.__qualname__}"] = index

        # developer-defined custom index types
        for index_str in settings.KH_INDEX_TYPES:
            cls: Type[BaseIndex] = import_dotted_string(index_str, safe=False)
            self._index_types[f"{cls.__module__}.{cls.__qualname__}"] = cls

    def exists(self, id: Optional[int] = None, name: Optional[str] = None) -> bool:
        """Check if the index exists

        Args:
            id (int): the id of the index

        Returns:
            bool: True if the index exists, False otherwise
        """
        if id:
            with Session(engine) as sess:
                index = sess.get(Index, id)
                return index is not None

        if name:
            with Session(engine) as sess:
                index = sess.exec(select(Index).where(Index.name == name)).one_or_none()
                return index is not None

        return False

    def on_application_startup(self):
        """This method is called by the base application when the application starts

        Load the index from database
        """
        self.load_index_types()

        for index in settings.KH_INDICES:
            if not self.exists(name=index["name"]):
                self.build_index(**index)

        with Session(engine) as sess:
            index_defs = sess.exec(select(Index))
            for index_def in index_defs:
                self.start_index(**index_def.model_dump())

    @property
    def indices(self):
        return self._indices

    def info(self):
        return {index.id: index for index in self._indices}


================================================
FILE: libs/ktem/ktem/index/models.py
================================================
from typing import Optional

from ktem.db.engine import engine
from sqlalchemy import JSON, Column
from sqlmodel import Field, SQLModel


# TODO: simplify with using SQLAlchemy directly
class Index(SQLModel, table=True):
    __table_args__ = {"extend_existing": True}
    __tablename__ = "ktem__index"  # type: ignore

    id: Optional[int] = Field(default=None, primary_key=True)
    name: str = Field(unique=True)
    index_type: str = Field()
    config: dict = Field(default={}, sa_column=Column(JSON))


Index.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/index/ui.py
================================================
import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
from ktem.utils.file import YAMLNoDateSafeLoader

from .manager import IndexManager


# UGLY way to restart gradio server by updating atime
def update_current_module_atime():
    import os
    import time

    # Define the file path
    file_path = __file__
    print("Updating atime for", file_path)

    # Get the current time
    current_time = time.time()
    # Set the modified time (and access time) to the current time
    os.utime(file_path, (current_time, current_time))


def format_description(cls):
    user_settings = cls.get_admin_settings()
    params_lines = ["| Name | Default | Description |", "| --- | --- | --- |"]
    for key, value in user_settings.items():
        params_lines.append(
            f"| {key} | {value.get('value', '')} | {value.get('info', '')} |"
        )
    return f"{cls.__doc__}\n\n" + "\n".join(params_lines)


class IndexManagement(BasePage):
    def __init__(self, app):
        self._app = app
        self.manager: IndexManager = app.index_manager
        self.spec_desc_default = (
            "# Spec description\n\nSelect an index to view the spec description."
        )
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab(label="View"):
            self.index_list = gr.DataFrame(
                headers=["id", "name", "index type"],
                interactive=False,
                column_widths=[10, 30, 60],
            )

            with gr.Column(visible=False) as self._selected_panel:
                self.selected_index_id = gr.Number(value=-1, visible=False)
                with gr.Row():
                    with gr.Column():
                        self.edit_name = gr.Textbox(
                            label="Index name",
                        )
                        self.edit_spec = gr.Textbox(
                            label="Index config",
                            info="Admin configuration of the Index in YAML format",
                            lines=10,
                        )

                        gr.Markdown(
                            "IMPORTANT: Changing or deleting the index will require "
                            "restarting the system. Some config settings will require "
                            "rebuilding the index for the index to work properly."
                        )
                        with gr.Row():
                            self.btn_edit_save = gr.Button(
                                "Save", min_width=10, variant="primary"
                            )
                            self.btn_delete = gr.Button(
                                "Delete", min_width=10, variant="stop"
                            )
                            with gr.Row(visible=False) as self._delete_confirm:
                                self.btn_delete_yes = gr.Button(
                                    "Confirm Delete",
                                    variant="stop",
                                    min_width=10,
                                )
                                self.btn_delete_no = gr.Button("Cancel", min_width=10)
                            self.btn_close = gr.Button("Close", min_width=10)

                    with gr.Column():
                        self.edit_spec_desc = gr.Markdown("# Spec description")

        with gr.Tab(label="Add"):
            with gr.Row():
                with gr.Column(scale=2):
                    self.name = gr.Textbox(
                        label="Index name",
                        info="Must be unique and non-empty.",
                    )
                    self.index_type = gr.Dropdown(label="Index type")
                    self.spec = gr.Textbox(
                        label="Specification",
                        info="Specification of the index in YAML format.",
                    )
                    gr.Markdown(
                        "<mark>Note</mark>: "
                        "After creating index, please restart the app"
                    )
                    self.btn_new = gr.Button("Add", variant="primary")

                with gr.Column(scale=3):
                    self.spec_desc = gr.Markdown(self.spec_desc_default)

    def _on_app_created(self):
        """Called when the app is created"""
        self._app.app.load(
            self.list_indices,
            inputs=[],
            outputs=[self.index_list],
        )
        self._app.app.load(
            lambda: gr.update(
                choices=[
                    (key.split(".")[-1], key) for key in self.manager.index_types.keys()
                ]
            ),
            outputs=[self.index_type],
        )

    def on_register_events(self):
        self.index_type.select(
            self.on_index_type_change,
            inputs=[self.index_type],
            outputs=[self.spec, self.spec_desc],
        )
        self.btn_new.click(
            self.create_index,
            inputs=[self.name, self.index_type, self.spec],
            outputs=None,
        ).success(self.list_indices, inputs=[], outputs=[self.index_list]).success(
            lambda: ("", None, "", self.spec_desc_default),
            outputs=[
                self.name,
                self.index_type,
                self.spec,
                self.spec_desc,
            ],
        ).success(
            update_current_module_atime
        )
        self.index_list.select(
            self.select_index,
            inputs=self.index_list,
            outputs=[self.selected_index_id],
            show_progress="hidden",
        )

        self.selected_index_id.change(
            self.on_selected_index_change,
            inputs=[self.selected_index_id],
            outputs=[
                self._selected_panel,
                # edit section
                self.edit_spec,
                self.edit_spec_desc,
                self.edit_name,
            ],
            show_progress="hidden",
        )
        self.btn_delete.click(
            lambda: (
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=True),
            ),
            inputs=[],
            outputs=[
                self.btn_edit_save,
                self.btn_delete,
                self.btn_close,
                self._delete_confirm,
            ],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_index,
            inputs=[self.selected_index_id],
            outputs=[self.selected_index_id],
            show_progress="hidden",
        ).then(self.list_indices, inputs=[], outputs=[self.index_list],).success(
            update_current_module_atime
        )
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=True),
                gr.update(visible=True),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[
                self.btn_edit_save,
                self.btn_delete,
                self.btn_close,
                self._delete_confirm,
            ],
            show_progress="hidden",
        )
        self.btn_edit_save.click(
            self.update_index,
            inputs=[
                self.selected_index_id,
                self.edit_name,
                self.edit_spec,
            ],
            show_progress="hidden",
        ).then(
            self.list_indices,
            inputs=[],
            outputs=[self.index_list],
        )
        self.btn_close.click(
            lambda: -1,
            outputs=[self.selected_index_id],
        )

    def on_index_type_change(self, index_type: str):
        """Update the spec description and pre-fill the default values

        Args:
            index_type: the name of the index type, this is usually the class name

        Returns:
            A tuple of the default spec and the description
        """
        index_type_cls = self.manager.index_types[index_type]
        required: dict = {
            key: value.get("value", None)
            for key, value in index_type_cls.get_admin_settings().items()
        }

        return yaml.dump(required, sort_keys=False), format_description(index_type_cls)

    def create_index(self, name: str, index_type: str, config: str):
        """Create the index"""
        name = name.strip()
        if not name:
            raise gr.Error("Name must not be empty")

        existing_names = {idx.name for idx in self.manager.indices}
        if name in existing_names:
            raise gr.Error(f"Index '{name}' already exists. Please use a unique name.")

        try:
            self.manager.build_index(
                name=name,
                config=yaml.load(config, Loader=YAMLNoDateSafeLoader),
                index_type=index_type,
            )
            gr.Info(f'Index "{name}" created successfully. Please restart the app!')
        except Exception as e:
            raise gr.Error(f'Failed to create index "{name}": {e}')

    def list_indices(self):
        """List the indices constructed by the user"""
        items = []
        for item in self.manager.indices:
            record = {}
            record["id"] = item.id
            record["name"] = item.name
            record["index type"] = item.__class__.__name__
            items.append(record)

        if items:
            indices_list = pd.DataFrame.from_records(items)
        else:
            indices_list = pd.DataFrame.from_records(
                [{"id": "-", "name": "-", "index type": "-"}]
            )

        return indices_list

    def select_index(self, index_list, ev: gr.SelectData) -> int:
        """Return the index id"""
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No index is constructed. Please create one first!")
            return -1

        if not ev.selected:
            return -1

        return int(index_list["id"][ev.index[0]])

    def on_selected_index_change(self, selected_index_id: int):
        """Show the relevant index as user selects it on the UI

        Args:
            selected_index_id: the id of the selected index
        """
        if selected_index_id == -1:
            _selected_panel = gr.update(visible=False)
            edit_spec = gr.update(value="")
            edit_spec_desc = gr.update(value="")
            edit_name = gr.update(value="")
        else:
            _selected_panel = gr.update(visible=True)
            index = self.manager.info()[selected_index_id]
            edit_spec = yaml.dump(index.config)
            edit_spec_desc = format_description(index.__class__)
            edit_name = index.name

        return (
            _selected_panel,
            edit_spec,
            edit_spec_desc,
            edit_name,
        )

    def update_index(self, selected_index_id: int, name: str, config: str):
        name = name.strip()
        if not name:
            raise gr.Error("Name must not be empty")

        # Check uniqueness (excluding current index)
        for idx in self.manager.indices:
            if idx.name == name and idx.id != selected_index_id:
                raise gr.Error(
                    f"Index '{name}' already exists. Please use a unique name."
                )

        try:
            spec = yaml.load(config, Loader=YAMLNoDateSafeLoader)
            self.manager.update_index(selected_index_id, name, spec)
            gr.Info(f'Index "{name}" updated successfully. Please restart the app!')
        except gr.Error:
            raise
        except Exception as e:
            raise gr.Error(f'Failed to save index "{name}": {e}')

    def delete_index(self, selected_index_id):
        try:
            self.manager.delete_index(selected_index_id)
            gr.Info("Delete index successfully. Please restart the app!")
        except Exception as e:
            gr.Warning(f"Fail to delete index: {e}")
            return selected_index_id

        return -1


================================================
FILE: libs/ktem/ktem/llms/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/llms/db.py
================================================
from typing import Type

from ktem.db.engine import engine
from sqlalchemy import JSON, Boolean, Column, String
from sqlalchemy.orm import DeclarativeBase
from theflow.settings import settings as flowsettings
from theflow.utils.modules import import_dotted_string


class Base(DeclarativeBase):
    pass


class BaseLLMTable(Base):
    """Base table to store language model"""

    __abstract__ = True

    name = Column(String, primary_key=True, unique=True)
    spec = Column(JSON, default={})
    default = Column(Boolean, default=False)


_base_llm: Type[BaseLLMTable] = (
    import_dotted_string(flowsettings.KH_TABLE_LLM, safe=False)
    if hasattr(flowsettings, "KH_TABLE_LLM")
    else BaseLLMTable
)


class LLMTable(_base_llm):  # type: ignore
    __tablename__ = "llm_table"


if not getattr(flowsettings, "KH_ENABLE_ALEMBIC", False):
    LLMTable.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/llms/manager.py
================================================
from typing import Optional, Type, overload

from sqlalchemy import select
from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings
from theflow.utils.modules import deserialize, import_dotted_string

from kotaemon.llms import ChatLLM

from .db import LLMTable, engine


class LLMManager:
    """Represent a pool of models"""

    def __init__(self):
        self._models: dict[str, ChatLLM] = {}
        self._info: dict[str, dict] = {}
        self._default: str = ""
        self._vendors: list[Type] = []

        if hasattr(flowsettings, "KH_LLMS"):
            for name, model in flowsettings.KH_LLMS.items():
                with Session(engine) as session:
                    stmt = select(LLMTable).where(LLMTable.name == name)
                    result = session.execute(stmt)
                    if not result.first():
                        item = LLMTable(
                            name=name,
                            spec=model["spec"],
                            default=model.get("default", False),
                        )
                        session.add(item)
                        session.commit()

        self.load()
        self.load_vendors()

    def load(self):
        """Load the model pool from database"""
        self._models, self._info, self._default = {}, {}, ""
        with Session(engine) as session:
            stmt = select(LLMTable)
            items = session.execute(stmt)

            for (item,) in items:
                self._models[item.name] = deserialize(item.spec, safe=False)
                self._info[item.name] = {
                    "name": item.name,
                    "spec": item.spec,
                    "default": item.default,
                }
                if item.default:
                    self._default = item.name

    def load_vendors(self):
        from kotaemon.llms import (
            AzureChatOpenAI,
            ChatOpenAI,
            LCAnthropicChat,
            LCCohereChat,
            LCGeminiChat,
            LCOllamaChat,
            LlamaCppChat,
        )

        self._vendors = [
            ChatOpenAI,
            AzureChatOpenAI,
            LCAnthropicChat,
            LCGeminiChat,
            LCCohereChat,
            LCOllamaChat,
            LlamaCppChat,
        ]

        for extra_vendor in getattr(flowsettings, "KH_LLM_EXTRA_VENDORS", []):
            self._vendors.append(import_dotted_string(extra_vendor, safe=False))

    def __getitem__(self, key: str) -> ChatLLM:
        """Get model by name"""
        return self._models[key]

    def __contains__(self, key: str) -> bool:
        """Check if model exists"""
        return key in self._models

    @overload
    def get(self, key: str, default: None) -> Optional[ChatLLM]:
        ...

    @overload
    def get(self, key: str, default: ChatLLM) -> ChatLLM:
        ...

    def get(self, key: str, default: Optional[ChatLLM] = None) -> Optional[ChatLLM]:
        """Get model by name with default value"""
        return self._models.get(key, default)

    def settings(self) -> dict:
        """Present model pools option for gradio"""
        return {
            "label": "LLM",
            "choices": list(self._models.keys()),
            "value": self.get_default_name(),
        }

    def options(self) -> dict:
        """Present a dict of models"""
        return self._models

    def get_random_name(self) -> str:
        """Get the name of random model

        Returns:
            str: random model name in the pool
        """
        import random

        if not self._models:
            raise ValueError("No models in pool")

        return random.choice(list(self._models.keys()))

    def get_default_name(self) -> str:
        """Get the name of default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            str: model name
        """
        if not self._models:
            raise ValueError("No models in pool")

        if not self._default:
            return self.get_random_name()

        return self._default

    def get_random(self) -> ChatLLM:
        """Get random model"""
        return self._models[self.get_random_name()]

    def get_default(self) -> ChatLLM:
        """Get default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            ChatLLM: model
        """
        return self._models[self.get_default_name()]

    def info(self) -> dict:
        """List all models"""
        return self._info

    def add(self, name: str, spec: dict, default: bool):
        """Add a new model to the pool"""
        if not name:
            raise ValueError("Name must not be empty")

        try:
            with Session(engine) as session:

                if default:
                    # turn all models to non-default
                    session.query(LLMTable).update({"default": False})
                    session.commit()

                item = LLMTable(name=name, spec=spec, default=default)
                session.add(item)
                session.commit()
        except Exception as e:
            raise ValueError(f"Failed to add model {name}: {e}")

        self.load()

    def delete(self, name: str):
        """Delete a model from the pool"""
        try:
            with Session(engine) as session:
                item = session.query(LLMTable).filter_by(name=name).first()
                session.delete(item)
                session.commit()
        except Exception as e:
            raise ValueError(f"Failed to delete model {name}: {e}")

        self.load()

    def update(self, name: str, spec: dict, default: bool, new_name: str = ""):
        """Update a model in the pool, optionally renaming it."""
        if not name:
            raise ValueError("Name must not be empty")

        if new_name and new_name != name:
            # Check uniqueness before destructive delete
            if new_name in self._info:
                raise ValueError(
                    f"Model '{new_name}' already exists. Use a unique name."
                )
            self.delete(name)
            self.add(new_name, spec=spec, default=default)
            return

        try:
            with Session(engine) as session:

                if default:
                    # turn all models to non-default
                    session.query(LLMTable).update({"default": False})
                    session.commit()

                item = session.query(LLMTable).filter_by(name=name).first()
                if not item:
                    raise ValueError(f"Model {name} not found")
                item.spec = spec
                item.default = default
                session.commit()
        except Exception as e:
            raise ValueError(f"Failed to update model {name}: {e}")

        self.load()

    def vendors(self) -> dict:
        """Return list of vendors"""
        return {vendor.__qualname__: vendor for vendor in self._vendors}


llms = LLMManager()


================================================
FILE: libs/ktem/ktem/llms/ui.py
================================================
from copy import deepcopy

import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
from ktem.utils.file import YAMLNoDateSafeLoader
from theflow.utils.modules import deserialize

from .manager import llms


def format_description(cls):
    params = cls.describe()["params"]
    params_lines = ["| Name | Type | Description |", "| --- | --- | --- |"]
    for key, value in params.items():
        if isinstance(value["auto_callback"], str):
            continue
        params_lines.append(f"| {key} | {value['type']} | {value['help']} |")
    return f"{cls.__doc__}\n\n" + "\n".join(params_lines)


class LLMManagement(BasePage):
    def __init__(self, app):
        self._app = app
        self.spec_desc_default = (
            "# Spec description\n\nSelect an LLM to view the spec description."
        )
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab(label="View"):
            self.llm_list = gr.DataFrame(
                headers=["name", "vendor", "default"],
                interactive=False,
                column_widths=[30, 40, 30],
            )

            with gr.Column(visible=False) as self._selected_panel:
                self.selected_llm_name = gr.Textbox(value="", visible=False)
                with gr.Row():
                    with gr.Column():
                        self.edit_default = gr.Checkbox(
                            label="Set default",
                            info=(
                                "Set this LLM as default. If no default is set, "
                                "a random LLM will be used. "
                                "This default LLM will be used by other components "
                                "by default if no LLM is specified for such components."
                            ),
                        )
                        self.edit_name = gr.Textbox(
                            label="Name",
                            info="Edit to rename this LLM.",
                        )
                        self.edit_spec = gr.Textbox(
                            label="Specification",
                            info="Specification of the LLM in YAML format",
                            lines=10,
                        )

                        with gr.Accordion(
                            label="Test connection", visible=False, open=False
                        ) as self._check_connection_panel:
                            with gr.Row():
                                with gr.Column(scale=4):
                                    self.connection_logs = gr.HTML("Logs")

                                with gr.Column(scale=1):
                                    self.btn_test_connection = gr.Button(
                                        "Test",
                                    )

                        with gr.Row(visible=False) as self._selected_panel_btn:
                            with gr.Column():
                                self.btn_edit_save = gr.Button(
                                    "Save", min_width=10, variant="primary"
                                )
                            with gr.Column():
                                self.btn_delete = gr.Button(
                                    "Delete", min_width=10, variant="stop"
                                )
                                with gr.Row():
                                    self.btn_delete_yes = gr.Button(
                                        "Confirm Delete",
                                        variant="stop",
                                        visible=False,
                                        min_width=10,
                                    )
                                    self.btn_delete_no = gr.Button(
                                        "Cancel", visible=False, min_width=10
                                    )
                            with gr.Column():
                                self.btn_close = gr.Button("Close", min_width=10)

                    with gr.Column():
                        self.edit_spec_desc = gr.Markdown("# Spec description")

        with gr.Tab(label="Add"):
            with gr.Row():
                with gr.Column(scale=2):
                    self.name = gr.Textbox(
                        label="LLM name",
                        info=(
                            "Must be unique. The name will be used to identify the LLM."
                        ),
                    )
                    self.llm_choices = gr.Dropdown(
                        label="LLM vendors",
                        info=(
                            "Choose the vendor for the LLM. Each vendor has different "
                            "specification."
                        ),
                    )
                    self.spec = gr.Textbox(
                        label="Specification",
                        info="Specification of the LLM in YAML format",
                    )
                    self.default = gr.Checkbox(
                        label="Set default",
                        info=(
                            "Set this LLM as default. This default LLM will be used "
                            "by default across the application."
                        ),
                    )
                    self.btn_new = gr.Button("Add LLM", variant="primary")

                with gr.Column(scale=3):
                    self.spec_desc = gr.Markdown(self.spec_desc_default)

    def _on_app_created(self):
        """Called when the app is created"""
        self._app.app.load(
            self.list_llms,
            inputs=[],
            outputs=[self.llm_list],
        )
        self._app.app.load(
            lambda: gr.update(choices=list(llms.vendors().keys())),
            outputs=[self.llm_choices],
        )

    def on_llm_vendor_change(self, vendor):
        vendor = llms.vendors()[vendor]

        required: dict = {}
        desc = vendor.describe()
        for key, value in desc["params"].items():
            if value.get("required", False):
                required[key] = None

        return yaml.dump(required), format_description(vendor)

    def on_register_events(self):
        self.llm_choices.select(
            self.on_llm_vendor_change,
            inputs=[self.llm_choices],
            outputs=[self.spec, self.spec_desc],
        )
        self.btn_new.click(
            self.create_llm,
            inputs=[self.name, self.llm_choices, self.spec, self.default],
            outputs=[],
        ).success(self.list_llms, inputs=[], outputs=[self.llm_list]).success(
            lambda: ("", None, "", False, self.spec_desc_default),
            outputs=[
                self.name,
                self.llm_choices,
                self.spec,
                self.default,
                self.spec_desc,
            ],
        )
        self.llm_list.select(
            self.select_llm,
            inputs=self.llm_list,
            outputs=[self.selected_llm_name],
            show_progress="hidden",
        )
        self.selected_llm_name.change(
            self.on_selected_llm_change,
            inputs=[self.selected_llm_name],
            outputs=[
                self._selected_panel,
                self._selected_panel_btn,
                # delete section
                self.btn_delete,
                self.btn_delete_yes,
                self.btn_delete_no,
                # edit section
                self.edit_name,
                self.edit_spec,
                self.edit_spec_desc,
                self.edit_default,
            ],
            show_progress="hidden",
        ).success(lambda: gr.update(value=""), outputs=[self.connection_logs])

        self.btn_delete.click(
            self.on_btn_delete_click,
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_llm,
            inputs=[self.selected_llm_name],
            outputs=[self.selected_llm_name],
            show_progress="hidden",
        ).then(
            self.list_llms,
            inputs=[],
            outputs=[self.llm_list],
        )
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_edit_save.click(
            self.save_llm,
            inputs=[
                self.selected_llm_name,
                self.edit_name,
                self.edit_default,
                self.edit_spec,
            ],
            outputs=[self.selected_llm_name],
            show_progress="hidden",
        ).then(
            self.list_llms,
            inputs=[],
            outputs=[self.llm_list],
        )
        self.btn_close.click(
            lambda: "",
            outputs=[self.selected_llm_name],
        )

        self.btn_test_connection.click(
            self.check_connection,
            inputs=[self.selected_llm_name, self.edit_spec],
            outputs=[self.connection_logs],
        )

    def create_llm(self, name, choices, spec, default):
        try:
            name = name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = (
                llms.vendors()[choices].__module__
                + "."
                + llms.vendors()[choices].__qualname__
            )

            llms.add(name, spec=spec, default=default)
            gr.Info(f"LLM '{name}' created successfully")
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(f"Failed to create LLM '{name}': {e}")

    def list_llms(self):
        """List the LLMs"""
        items = []
        for item in llms.info().values():
            record = {}
            record["name"] = item["name"]
            record["vendor"] = item["spec"].get("__type__", "-").split(".")[-1]
            record["default"] = item["default"]
            items.append(record)

        if items:
            llm_list = pd.DataFrame.from_records(items)
        else:
            llm_list = pd.DataFrame.from_records(
                [{"name": "-", "vendor": "-", "default": "-"}]
            )

        return llm_list

    def select_llm(self, llm_list, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No LLM is loaded. Please add LLM first")
            return ""

        if not ev.selected:
            return ""

        return llm_list["name"][ev.index[0]]

    def on_selected_llm_change(self, selected_llm_name):
        if selected_llm_name == "":
            _selected_panel = gr.update(visible=False)
            _selected_panel_btn = gr.update(visible=False)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)
            edit_name = gr.update(value="")
            edit_spec = gr.update(value="")
            edit_spec_desc = gr.update(value="")
            edit_default = gr.update(value=False)
        else:
            _selected_panel = gr.update(visible=True)
            _selected_panel_btn = gr.update(visible=True)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)

            info = deepcopy(llms.info()[selected_llm_name])
            vendor_str = info["spec"].pop("__type__", "-").split(".")[-1]
            vendor = llms.vendors()[vendor_str]

            edit_name = selected_llm_name
            edit_spec = yaml.dump(info["spec"])
            edit_spec_desc = format_description(vendor)
            edit_default = info["default"]

        return (
            _selected_panel,
            _selected_panel_btn,
            btn_delete,
            btn_delete_yes,
            btn_delete_no,
            edit_name,
            edit_spec,
            edit_spec_desc,
            edit_default,
        )

    def on_btn_delete_click(self):
        btn_delete = gr.update(visible=False)
        btn_delete_yes = gr.update(visible=True)
        btn_delete_no = gr.update(visible=True)

        return btn_delete, btn_delete_yes, btn_delete_no

    def check_connection(self, selected_llm_name: str, selected_spec):
        log_content: str = ""

        try:
            log_content += f"- Testing model: {selected_llm_name}<br>"
            yield log_content

            # Parse content & init model
            info = deepcopy(llms.info()[selected_llm_name])

            # Parse content & create dummy embedding
            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)
            info["spec"].update(spec)

            llm = deserialize(info["spec"], safe=False)

            if llm is None:
                raise Exception(f"Can not found model: {selected_llm_name}")

            log_content += "- Sending a message `Hi`<br>"
            yield log_content
            respond = llm("Hi")

            log_content += (
                f"<mark style='background: green; color: white'>- Connection success. "
                f"Got response:\n {respond}</mark><br>"
            )
            yield log_content

            gr.Info(f"LLM {selected_llm_name} connect successfully")
        except Exception as e:
            log_content += (
                f"<mark style='color: yellow; background: red'>- Connection failed. "
                f"Got error:\n {e}</mark>"
            )
            yield log_content

        return log_content

    def save_llm(self, selected_llm_name, edit_name, default, spec):
        try:
            new_name = edit_name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = llms.info()[selected_llm_name]["spec"]["__type__"]
            llms.update(
                selected_llm_name, spec=spec, default=default, new_name=new_name
            )
            final_name = (
                new_name if new_name != selected_llm_name else selected_llm_name
            )
            gr.Info(f"LLM '{final_name}' saved successfully")
            return final_name
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(f"Failed to save LLM '{selected_llm_name}': {e}")

    def delete_llm(self, selected_llm_name):
        try:
            llms.delete(selected_llm_name)
        except Exception as e:
            gr.Error(f"Failed to delete LLM {selected_llm_name}: {e}")
            return selected_llm_name

        return ""


================================================
FILE: libs/ktem/ktem/main.py
================================================
import gradio as gr
from decouple import config
from ktem.app import BaseApp
from ktem.pages.chat import ChatPage
from ktem.pages.help import HelpPage
from ktem.pages.resources import ResourcesTab
from ktem.pages.settings import SettingsPage
from ktem.pages.setup import SetupPage
from theflow.settings import settings as flowsettings

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
KH_ENABLE_FIRST_SETUP = getattr(flowsettings, "KH_ENABLE_FIRST_SETUP", False)
KH_APP_DATA_EXISTS = getattr(flowsettings, "KH_APP_DATA_EXISTS", True)

# override first setup setting
if config("KH_FIRST_SETUP", default=False, cast=bool):
    KH_APP_DATA_EXISTS = False


def toggle_first_setup_visibility():
    global KH_APP_DATA_EXISTS
    is_first_setup = not KH_DEMO_MODE and not KH_APP_DATA_EXISTS
    KH_APP_DATA_EXISTS = True
    return gr.update(visible=is_first_setup), gr.update(visible=not is_first_setup)


class App(BaseApp):
    """The main app of Kotaemon

    The main application contains app-level information:
        - setting state
        - user id

    App life-cycle:
        - Render
        - Declare public events
        - Subscribe public events
        - Register events
    """

    def ui(self):
        """Render the UI"""
        self._tabs = {}

        with gr.Tabs() as self.tabs:
            if self.f_user_management:
                from ktem.pages.login import LoginPage

                with gr.Tab(
                    "Welcome", elem_id="login-tab", id="login-tab"
                ) as self._tabs["login-tab"]:
                    self.login_page = LoginPage(self)

            with gr.Tab(
                "Chat",
                elem_id="chat-tab",
                id="chat-tab",
                visible=not self.f_user_management,
            ) as self._tabs["chat-tab"]:
                self.chat_page = ChatPage(self)

            if len(self.index_manager.indices) == 1:
                for index in self.index_manager.indices:
                    with gr.Tab(
                        f"{index.name}",
                        elem_id="indices-tab",
                        elem_classes=[
                            "fill-main-area-height",
                            "scrollable",
                            "indices-tab",
                        ],
                        id="indices-tab",
                        visible=not self.f_user_management and not KH_DEMO_MODE,
                    ) as self._tabs[f"{index.id}-tab"]:
                        page = index.get_index_page_ui()
                        setattr(self, f"_index_{index.id}", page)
            elif len(self.index_manager.indices) > 1:
                with gr.Tab(
                    "Files",
                    elem_id="indices-tab",
                    elem_classes=["fill-main-area-height", "scrollable", "indices-tab"],
                    id="indices-tab",
                    visible=not self.f_user_management and not KH_DEMO_MODE,
                ) as self._tabs["indices-tab"]:
                    for index in self.index_manager.indices:
                        with gr.Tab(
                            index.name,
                            elem_id=f"{index.id}-tab",
                        ) as self._tabs[f"{index.id}-tab"]:
                            page = index.get_index_page_ui()
                            setattr(self, f"_index_{index.id}", page)

            if not KH_DEMO_MODE:
                if not KH_SSO_ENABLED:
                    with gr.Tab(
                        "Resources",
                        elem_id="resources-tab",
                        id="resources-tab",
                        visible=not self.f_user_management,
                        elem_classes=["fill-main-area-height", "scrollable"],
                    ) as self._tabs["resources-tab"]:
                        self.resources_page = ResourcesTab(self)

                with gr.Tab(
                    "Settings",
                    elem_id="settings-tab",
                    id="settings-tab",
                    visible=not self.f_user_management,
                    elem_classes=["fill-main-area-height", "scrollable"],
                ) as self._tabs["settings-tab"]:
                    self.settings_page = SettingsPage(self)

            with gr.Tab(
                "Help",
                elem_id="help-tab",
                id="help-tab",
                visible=not self.f_user_management,
                elem_classes=["fill-main-area-height", "scrollable"],
            ) as self._tabs["help-tab"]:
                self.help_page = HelpPage(self)

        if KH_ENABLE_FIRST_SETUP:
            with gr.Column(visible=False) as self.setup_page_wrapper:
                self.setup_page = SetupPage(self)

    def on_subscribe_public_events(self):
        if self.f_user_management:
            from ktem.db.engine import engine
            from ktem.db.models import User
            from sqlmodel import Session, select

            def toggle_login_visibility(user_id):
                if not user_id:
                    return list(
                        (
                            gr.update(visible=True)
                            if k == "login-tab"
                            else gr.update(visible=False)
                        )
                        for k in self._tabs.keys()
                    ) + [gr.update(selected="login-tab")]

                with Session(engine) as session:
                    user = session.exec(select(User).where(User.id == user_id)).first()
                    if user is None:
                        return list(
                            (
                                gr.update(visible=True)
                                if k == "login-tab"
                                else gr.update(visible=False)
                            )
                            for k in self._tabs.keys()
                        )

                    is_admin = user.admin

                tabs_update = []
                for k in self._tabs.keys():
                    if k == "login-tab":
                        tabs_update.append(gr.update(visible=False))
                    elif k == "resources-tab":
                        tabs_update.append(gr.update(visible=is_admin))
                    else:
                        tabs_update.append(gr.update(visible=True))

                tabs_update.append(gr.update(selected="chat-tab"))

                return tabs_update

            self.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": toggle_login_visibility,
                    "inputs": [self.user_id],
                    "outputs": list(self._tabs.values()) + [self.tabs],
                    "show_progress": "hidden",
                },
            )

            self.subscribe_event(
                name="onSignOut",
                definition={
                    "fn": toggle_login_visibility,
                    "inputs": [self.user_id],
                    "outputs": list(self._tabs.values()) + [self.tabs],
                    "show_progress": "hidden",
                },
            )

        if KH_ENABLE_FIRST_SETUP:
            self.subscribe_event(
                name="onFirstSetupComplete",
                definition={
                    "fn": toggle_first_setup_visibility,
                    "inputs": [],
                    "outputs": [self.setup_page_wrapper, self.tabs],
                    "show_progress": "hidden",
                },
            )

    def _on_app_created(self):
        """Called when the app is created"""

        if KH_ENABLE_FIRST_SETUP:
            self.app.load(
                toggle_first_setup_visibility,
                inputs=[],
                outputs=[self.setup_page_wrapper, self.tabs],
            )


================================================
FILE: libs/ktem/ktem/mcp/__init__.py
================================================
# MCP (Model Context Protocol) integration for kotaemon


================================================
FILE: libs/ktem/ktem/mcp/db.py
================================================
from ktem.db.engine import engine
from sqlalchemy import JSON, Column, String
from sqlalchemy import inspect as sa_inspect
from sqlalchemy.orm import DeclarativeBase


class Base(DeclarativeBase):
    pass


class BaseMCPTable(Base):
    """Base table to store MCP server configurations"""

    __abstract__ = True

    name = Column(String, primary_key=True, unique=True)
    config = Column(JSON, default={})  # Full JSON config for the MCP server


class MCPTable(BaseMCPTable):
    __tablename__ = "mcp_table"


# Drop and recreate to handle schema changes from old multi-column layout.
_inspector = sa_inspect(engine)
if _inspector.has_table("mcp_table"):
    _columns = {col["name"] for col in _inspector.get_columns("mcp_table")}
    if "config" not in _columns:
        MCPTable.__table__.drop(engine)  # type: ignore[attr-defined]

MCPTable.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/mcp/manager.py
================================================
"""Manager for MCP server configurations.

Provides CRUD operations on the MCPTable.
All tool building/discovery logic lives in kotaemon.agents.tools.mcp.
"""

import logging

from sqlalchemy import select
from sqlalchemy.orm import Session

from .db import MCPTable, engine

logger = logging.getLogger(__name__)


class MCPManager:
    """Manages MCP server configurations stored in the database."""

    def __init__(self):
        self._configs: dict[str, dict] = {}
        self.load()

    def load(self):
        """Reload configurations from the database."""
        self._info = {}
        with Session(engine) as session:
            stmt = select(MCPTable)
            items = session.execute(stmt)
            for (item,) in items:
                self._info[item.name] = {
                    "name": item.name,
                    "config": item.config,
                }

    def info(self) -> dict:
        """Return all MCP server configurations."""
        return self._info

    def get(self, name: str) -> dict | None:
        """Get a single configuration by name."""
        return self._info.get(name)

    def add(self, name: str, config: dict):
        """Add a new MCP server configuration."""
        name = name.strip()
        if not name:
            raise ValueError("Name must not be empty")

        with Session(engine) as session:
            item = MCPTable(name=name, config=config)
            session.add(item)
            session.commit()

        self.load()

    def update(self, name: str, config: dict):
        """Update an existing MCP server configuration."""
        if not name:
            raise ValueError("Name must not be empty")

        with Session(engine) as session:
            item = session.query(MCPTable).filter_by(name=name).first()
            if not item:
                raise ValueError(f"MCP server '{name}' not found")
            item.config = config  # type: ignore[assignment]
            session.commit()

        self.load()

    def delete(self, name: str):
        """Delete an MCP server configuration."""
        with Session(engine) as session:
            item = session.query(MCPTable).filter_by(name=name).first()
            if item:
                session.delete(item)
                session.commit()

        self.load()

    def get_enabled_tools(self) -> list[str]:
        """Return tool choice names for all MCP servers."""
        choices = []
        for name, entry in self._info.items():
            config = entry.get("config", {})
            enabled_tools = config.get("enabled_tools", None)
            if enabled_tools is not None:
                choices.append(f"[MCP] {name}")
        return choices


mcp_manager = MCPManager()


================================================
FILE: libs/ktem/ktem/mcp/ui.py
================================================
import json
import logging

import gradio as gr
import pandas as pd
from ktem.app import BasePage

from kotaemon.agents.tools.mcp import discover_tools_info, format_tool_list

from .manager import mcp_manager

logger = logging.getLogger(__name__)

TOOLS_DEFAULT = "# Available Tools\n\nSelect or add an MCP server to view its tools."

MCP_SERVERS_KEY = "mcpServers"

EXAMPLE_CONFIG = """{
  "mcpServers": {
  }
}"""


class MCPManagement(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab(label="View"):
            self.mcp_list = gr.DataFrame(
                headers=["name", "config"],
                interactive=False,
                column_widths=[30, 70],
            )

            with gr.Column(visible=False) as self._selected_panel:
                self.selected_mcp_name = gr.Textbox(value="", visible=False)
                with gr.Row():
                    with gr.Column():
                        self.edit_config = gr.Code(
                            label="Configuration (JSON)",
                            language="json",
                            lines=10,
                        )

                        with gr.Row(visible=False) as self._selected_panel_btn:
                            with gr.Column():
                                self.btn_edit_save = gr.Button(
                                    "Save", min_width=10, variant="primary"
                                )
                            with gr.Column():
                                self.btn_delete = gr.Button(
                                    "Delete", min_width=10, variant="stop"
                                )
                                with gr.Row():
                                    self.btn_delete_yes = gr.Button(
                                        "Confirm Delete",
                                        variant="stop",
                                        visible=False,
                                        min_width=10,
                                    )
                                    self.btn_delete_no = gr.Button(
                                        "Cancel", visible=False, min_width=10
                                    )
                            with gr.Column():
                                self.btn_close = gr.Button("Close", min_width=10)

                    with gr.Column():
                        self.edit_tools_display = gr.Markdown(TOOLS_DEFAULT)

        with gr.Tab(label="Add"):
            with gr.Row():
                with gr.Column(scale=2):
                    self.config = gr.Code(
                        label="Configuration (JSON)",
                        language="json",
                        lines=10,
                        value=EXAMPLE_CONFIG,
                    )
                    gr.HTML(
                        "<br/>"
                    )  # Fix: Prevent the overflow of the gr.Code affect click button
                    with gr.Row():
                        self.btn_new = gr.Button("Add MCP Servers", variant="primary")

                with gr.Column(scale=3):
                    self.add_tools_display = gr.Markdown(TOOLS_DEFAULT)

    def _on_app_created(self):
        """Called when the app is created."""
        self._app.app.load(
            self.list_servers,
            inputs=[],
            outputs=[self.mcp_list],
        )

    def on_register_events(self):
        # Add new server — save first, then fetch tools async
        self.btn_new.click(
            self.create_server,
            inputs=[self.config],
            outputs=[self.add_tools_display],
        ).success(self.list_servers, inputs=[], outputs=[self.mcp_list]).then(
            self.fetch_tools_for_add,
            inputs=[self.config],
            outputs=[self.add_tools_display],
        ).then(
            lambda: EXAMPLE_CONFIG,
            outputs=[self.config],
        )

        # Select a server from list
        self.mcp_list.select(
            self.select_server,
            inputs=self.mcp_list,
            outputs=[self.selected_mcp_name],
            show_progress="hidden",
        )
        self.selected_mcp_name.change(
            self.on_selected_server_change,
            inputs=[self.selected_mcp_name],
            outputs=[
                self._selected_panel,
                self._selected_panel_btn,
                self.btn_delete,
                self.btn_delete_yes,
                self.btn_delete_no,
                self.edit_config,
                self.edit_tools_display,
            ],
            show_progress="hidden",
        ).then(
            self.fetch_tools_for_view,
            inputs=[self.selected_mcp_name],
            outputs=[self.edit_tools_display],
        )

        # Delete flow
        self.btn_delete.click(
            self.on_btn_delete_click,
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_server,
            inputs=[self.selected_mcp_name],
            outputs=[self.selected_mcp_name],
            show_progress="hidden",
        ).then(self.list_servers, inputs=[], outputs=[self.mcp_list])
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )

        # Save edits — save first, then refresh tools
        self.btn_edit_save.click(
            self.save_server,
            inputs=[self.selected_mcp_name, self.edit_config],
            outputs=[self.edit_tools_display],
            show_progress="hidden",
        ).then(self.list_servers, inputs=[], outputs=[self.mcp_list]).then(
            self.fetch_tools_for_view,
            inputs=[self.selected_mcp_name],
            outputs=[self.edit_tools_display],
        )

        # Close panel
        self.btn_close.click(lambda: "", outputs=[self.selected_mcp_name])

    # --- Handlers ---

    def _fetch_tools_markdown(self, config: dict) -> str:
        """Fetch tools from MCP server and return as formatted HTML."""
        try:
            tool_infos = discover_tools_info(config)
            enabled_tools = config.get("enabled_tools", None)
            return format_tool_list(tool_infos, enabled_tools)
        except Exception as e:
            return f"❌ Failed to fetch tools: {e}"

    def create_server(self, config_str):
        """Create server(s), show loading placeholder."""
        try:
            configs = json.loads(config_str)
        except json.JSONDecodeError as e:
            raise gr.Error(f"Invalid JSON: {e}")

        if not isinstance(configs, dict) or MCP_SERVERS_KEY not in configs:
            raise gr.Error(
                f"Config must be a dictionary with '{MCP_SERVERS_KEY}' root key."
            )

        mcp_servers = configs[MCP_SERVERS_KEY]
        if not isinstance(mcp_servers, dict):
            raise gr.Error(
                f"'{MCP_SERVERS_KEY}' must be a mapping of server names to configs."
            )

        # Validate that no names are empty before processing
        for name in mcp_servers:
            name = name.strip()
            if not name:
                raise gr.Error("Server names cannot be empty.")

        success_count = 0
        failed_count = 0
        msgs = []
        for name, config in mcp_servers.items():
            name = name.strip()
            if name in mcp_manager.info():
                gr.Warning(f"MCP server '{name}' already exists. Skipping.")
                failed_count += 1
                continue

            try:
                mcp_manager.add(name, config)
                success_count += 1
                msgs.append(f"# Tools for '{name}'\n\n⏳ Fetching tools...")
            except Exception as e:
                gr.Warning(f"Failed to create MCP server '{name}': {e}")
                failed_count += 1

        if success_count > 0:
            gr.Info(f"{success_count} MCP server(s) created successfully")

        if not msgs:
            return TOOLS_DEFAULT

        return "\n\n".join(msgs)

    def fetch_tools_for_add(self, config_str):
        """Fetch tools after server was added (chained call)."""
        if not config_str:
            return TOOLS_DEFAULT
        try:
            configs = json.loads(config_str)
        except json.JSONDecodeError:
            return "❌ Invalid JSON config"

        if not isinstance(configs, dict) or MCP_SERVERS_KEY not in configs:
            return f"❌ Config must be a dictionary with '{MCP_SERVERS_KEY}' root key"

        mcp_servers = configs[MCP_SERVERS_KEY]
        if not isinstance(mcp_servers, dict):
            return f"❌ '{MCP_SERVERS_KEY}' must be a dictionary"

        msgs = []
        for name, config in mcp_servers.items():
            msgs.append(
                f"# Tools for '{name.strip()}'\n\n{self._fetch_tools_markdown(config)}"
            )
        return "\n\n".join(msgs)

    def fetch_tools_for_view(self, selected_name):
        """Fetch tools for the View panel (chained call)."""
        if not selected_name:
            return TOOLS_DEFAULT
        entry = mcp_manager.info().get(selected_name)
        if not entry:
            return TOOLS_DEFAULT
        config = entry.get("config", {})
        return f"# Tools for '{selected_name}'\n\n{self._fetch_tools_markdown(config)}"

    def list_servers(self):
        items = []
        for entry in mcp_manager.info().values():
            items.append(
                {
                    "name": entry["name"],
                    "config": json.dumps(entry.get("config", {})),
                }
            )

        if items:
            return pd.DataFrame.from_records(items)
        return pd.DataFrame.from_records([{"name": "-", "config": "-"}])

    def select_server(self, mcp_list, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No MCP server configured. Please add one first.")
            return ""
        if not ev.selected:
            return ""
        return mcp_list["name"][ev.index[0]]

    def on_selected_server_change(self, selected_name):
        if selected_name == "":
            return (
                gr.update(visible=False),  # panel
                gr.update(visible=False),  # buttons
                gr.update(visible=True),  # delete
                gr.update(visible=False),  # delete_yes
                gr.update(visible=False),  # delete_no
                gr.update(value="{}"),  # config
                gr.update(value=TOOLS_DEFAULT),  # tools display
            )

        entry = mcp_manager.info()[selected_name]
        config = entry.get("config", {})
        config_str = json.dumps(config, indent=2)

        return (
            gr.update(visible=True),
            gr.update(visible=True),
            gr.update(visible=True),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(value=config_str),
            gr.update(value=f"# Tools for '{selected_name}'\n\n⏳ Fetching tools..."),
        )

    def on_btn_delete_click(self):
        return (
            gr.update(visible=False),
            gr.update(visible=True),
            gr.update(visible=True),
        )

    def delete_server(self, selected_name):
        try:
            mcp_manager.delete(selected_name)
            gr.Info(f"MCP server '{selected_name}' deleted successfully")
        except Exception as e:
            gr.Error(f"Failed to delete MCP server '{selected_name}': {e}")
            return selected_name
        return ""

    def save_server(self, selected_name, config_str):
        try:
            config = json.loads(config_str)
        except json.JSONDecodeError as e:
            raise gr.Error(f"Invalid JSON: {e}")

        try:
            mcp_manager.update(selected_name, config)
            gr.Info(f"MCP server '{selected_name}' saved successfully")
        except Exception as e:
            raise gr.Error(f"Failed to save MCP server '{selected_name}': {e}")

        # Show loading placeholder; tools fetched in chained .then()
        return f"# Tools for '{selected_name}'\n\n⏳ Refreshing tools..."


================================================
FILE: libs/ktem/ktem/pages/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/pages/chat/__init__.py
================================================
import asyncio
import json
import re
from copy import deepcopy
from typing import Optional

import gradio as gr
from decouple import config
from ktem.app import BasePage
from ktem.components import reasonings
from ktem.db.models import Conversation, engine
from ktem.index.file.ui import File
from ktem.reasoning.prompt_optimization.mindmap import MINDMAP_HTML_EXPORT_TEMPLATE
from ktem.reasoning.prompt_optimization.suggest_conversation_name import (
    SuggestConvNamePipeline,
)
from ktem.reasoning.prompt_optimization.suggest_followup_chat import (
    SuggestFollowupQuesPipeline,
)
from plotly.io import from_json
from sqlmodel import Session, select
from theflow.settings import settings as flowsettings
from theflow.utils.modules import import_dotted_string

from kotaemon.base import Document
from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
from kotaemon.indices.qa.utils import strip_think_tag

from ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls
from ...utils.commands import WEB_SEARCH_COMMAND
from ...utils.hf_papers import get_recommended_papers
from ...utils.rate_limit import check_rate_limit
from .chat_panel import ChatPanel
from .chat_suggestion import ChatSuggestion
from .common import STATE
from .control import ConversationControl
from .demo_hint import HintPage
from .paper_list import PaperListPage
from .report import ReportIssue

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
KH_WEB_SEARCH_BACKEND = getattr(flowsettings, "KH_WEB_SEARCH_BACKEND", None)
WebSearch = None
if KH_WEB_SEARCH_BACKEND:
    try:
        WebSearch = import_dotted_string(KH_WEB_SEARCH_BACKEND, safe=False)
    except (ImportError, AttributeError) as e:
        print(f"Error importing {KH_WEB_SEARCH_BACKEND}: {e}")

REASONING_LIMITS = 2 if KH_DEMO_MODE else 10
DEFAULT_SETTING = "(default)"
INFO_PANEL_SCALES = {True: 8, False: 4}
DEFAULT_QUESTION = (
    "What is the summary of this document?"
    if not KH_DEMO_MODE
    else "What is the summary of this paper?"
)

chat_input_focus_js = """
function() {
    let chatInput = document.querySelector("#chat-input textarea");
    chatInput.focus();
}
"""

quick_urls_submit_js = """
function() {
    let urlInput = document.querySelector("#quick-url-demo textarea");
    console.log("URL input:", urlInput);
    urlInput.dispatchEvent(new KeyboardEvent('keypress', {'key': 'Enter'}));
}
"""

recommended_papers_js = """
function() {
    // Get all links and attach click event
    var links = document.querySelectorAll("#related-papers a");

    function submitPaper(event) {
        event.preventDefault();
        var target = event.currentTarget;
        var url = target.getAttribute("href");
        console.log("URL:", url);

        let newChatButton = document.querySelector("#new-conv-button");
        newChatButton.click();

        setTimeout(() => {
            let urlInput = document.querySelector("#quick-url-demo textarea");
            // Fill the URL input
            urlInput.value = url;
            urlInput.dispatchEvent(new Event("input", { bubbles: true }));
            urlInput.dispatchEvent(new KeyboardEvent('keypress', {'key': 'Enter'}));
            }, 500
        );
    }

    for (var i = 0; i < links.length; i++) {
        links[i].onclick = submitPaper;
    }
}
"""

clear_bot_message_selection_js = """
function() {
    var bot_messages = document.querySelectorAll(
        "div#main-chat-bot div.message-row.bot-row"
    );
    bot_messages.forEach(message => {
        message.classList.remove("text_selection");
    });
}
"""

pdfview_js = """
function() {
    setTimeout(fullTextSearch(), 100);

    // Get all links and attach click event
    var links = document.getElementsByClassName("pdf-link");
    for (var i = 0; i < links.length; i++) {
        links[i].onclick = openModal;
    }

    // Get all citation links and attach click event
    var links = document.querySelectorAll("a.citation");
    for (var i = 0; i < links.length; i++) {
        links[i].onclick = scrollToCitation;
    }

    var markmap_div = document.querySelector("div.markmap");
    var mindmap_el_script = document.querySelector('div.markmap script');

    if (mindmap_el_script) {
        markmap_div_html = markmap_div.outerHTML;
    }

    // render the mindmap if the script tag is present
    if (mindmap_el_script) {
        markmap.autoLoader.renderAll();
    }

    setTimeout(() => {
        var mindmap_el = document.querySelector('svg.markmap');

        var text_nodes = document.querySelectorAll("svg.markmap div");
        for (var i = 0; i < text_nodes.length; i++) {
            text_nodes[i].onclick = fillChatInput;
        }

        if (mindmap_el) {
            function on_svg_export(event) {
                html = "{html_template}";
                html = html.replace("{markmap_div}", markmap_div_html);
                spawnDocument(html, {window: "width=1000,height=1000"});
            }

            var link = document.getElementById("mindmap-toggle");
            if (link) {
                link.onclick = function(event) {
                    event.preventDefault(); // Prevent the default link behavior
                    var div = document.querySelector("div.markmap");
                    if (div) {
                        var currentHeight = div.style.height;
                        if (currentHeight === '400px' || (currentHeight === '')) {
                            div.style.height = '650px';
                        } else {
                            div.style.height = '400px'
                        }
                    }
                };
            }

            if (markmap_div_html) {
                var link = document.getElementById("mindmap-export");
                if (link) {
                    link.addEventListener('click', on_svg_export);
                }
            }
        }
    }, 250);

    return [links.length]
}
""".replace(
    "{html_template}",
    MINDMAP_HTML_EXPORT_TEMPLATE.replace("\n", "").replace('"', '\\"'),
)

fetch_api_key_js = """
function(_, __) {
    api_key = getStorage('google_api_key', '');
    console.log('session API key:', api_key);
    return [api_key, _];
}
"""


class ChatPage(BasePage):
    def __init__(self, app):
        self._app = app
        self._indices_input = []

        self.on_building_ui()

        self._preview_links = gr.State(value=None)
        self._reasoning_type = gr.State(value=None)
        self._conversation_renamed = gr.State(value=False)
        self._use_suggestion = gr.State(
            value=getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False)
        )
        self._info_panel_expanded = gr.State(value=True)
        self._command_state = gr.State(value=None)
        self._user_api_key = gr.Text(value="", visible=False)

    def on_building_ui(self):
        with gr.Row():
            self.state_chat = gr.State(STATE)
            self.state_retrieval_history = gr.State([])
            self.state_plot_history = gr.State([])
            self.state_plot_panel = gr.State(None)
            self.first_selector_choices = gr.State(None)

            with gr.Column(scale=1, elem_id="conv-settings-panel") as self.conv_column:
                self.chat_control = ConversationControl(self._app)

                for index_id, index in enumerate(self._app.index_manager.indices):
                    index.selector = None
                    index_ui = index.get_selector_component_ui()
                    if not index_ui:
                        # the index doesn't have a selector UI component
                        continue

                    index_ui.unrender()  # need to rerender later within Accordion
                    is_first_index = index_id == 0
                    index_name = index.name

                    if KH_DEMO_MODE and is_first_index:
                        index_name = "Select from Paper Collection"

                    with gr.Accordion(
                        label=index_name,
                        open=is_first_index,
                        elem_id=f"index-{index_id}",
                    ):
                        index_ui.render()
                        gr_index = index_ui.as_gradio_component()

                        # get the file selector choices for the first index
                        if index_id == 0:
                            self.first_selector_choices = index_ui.selector_choices
                            self.first_indexing_url_fn = None

                        if gr_index:
                            if isinstance(gr_index, list):
                                index.selector = tuple(
                                    range(
                                        len(self._indices_input),
                                        len(self._indices_input) + len(gr_index),
                                    )
                                )
                                index.default_selector = index_ui.default()
                                self._indices_input.extend(gr_index)
                            else:
                                index.selector = len(self._indices_input)
                                index.default_selector = index_ui.default()
                                self._indices_input.append(gr_index)
                        setattr(self, f"_index_{index.id}", index_ui)

                self.chat_suggestion = ChatSuggestion(self._app)

                if len(self._app.index_manager.indices) > 0:
                    quick_upload_label = (
                        "Quick Upload" if not KH_DEMO_MODE else "Or input new paper URL"
                    )

                    with gr.Accordion(label=quick_upload_label) as _:
                        self.quick_file_upload_status = gr.Markdown()
                        if not KH_DEMO_MODE:
                            self.quick_file_upload = File(
                                file_types=list(KH_DEFAULT_FILE_EXTRACTORS.keys()),
                                file_count="multiple",
                                container=True,
                                show_label=False,
                                elem_id="quick-file",
                            )
                        self.quick_urls = gr.Textbox(
                            placeholder=(
                                "Or paste URLs"
                                if not KH_DEMO_MODE
                                else "Paste Arxiv URLs\n(https://arxiv.org/abs/xxx)"
                            ),
                            lines=1,
                            container=False,
                            show_label=False,
                            elem_id=(
                                "quick-url" if not KH_DEMO_MODE else "quick-url-demo"
                            ),
                        )

                if not KH_DEMO_MODE:
                    self.report_issue = ReportIssue(self._app)
                else:
                    with gr.Accordion(label="Related papers", open=False):
                        self.related_papers = gr.Markdown(elem_id="related-papers")

                    self.hint_page = HintPage(self._app)

            with gr.Column(scale=6, elem_id="chat-area"):
                if KH_DEMO_MODE:
                    self.paper_list = PaperListPage(self._app)

                self.chat_panel = ChatPanel(self._app)

                with gr.Accordion(
                    label="Chat settings",
                    elem_id="chat-settings-expand",
                    open=False,
                    visible=not KH_DEMO_MODE,
                ) as self.chat_settings:
                    with gr.Row(elem_id="quick-setting-labels"):
                        gr.HTML("Reasoning method")
                        gr.HTML(
                            "Model", visible=not KH_DEMO_MODE and not KH_SSO_ENABLED
                        )
                        gr.HTML("Language")

                    with gr.Row():
                        reasoning_setting = (
                            self._app.default_settings.reasoning.settings["use"]
                        )
                        model_setting = self._app.default_settings.reasoning.options[
                            "simple"
                        ].settings["llm"]
                        language_setting = (
                            self._app.default_settings.reasoning.settings["lang"]
                        )
                        citation_setting = self._app.default_settings.reasoning.options[
                            "simple"
                        ].settings["highlight_citation"]

                        self.reasoning_type = gr.Dropdown(
                            choices=reasoning_setting.choices[:REASONING_LIMITS],
                            value=reasoning_setting.value,
                            container=False,
                            show_label=False,
                        )
                        self.model_type = gr.Dropdown(
                            choices=model_setting.choices,
                            value=model_setting.value,
                            container=False,
                            show_label=False,
                            visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
                        )
                        self.language = gr.Dropdown(
                            choices=language_setting.choices,
                            value=language_setting.value,
                            container=False,
                            show_label=False,
                        )

                        self.citation = gr.Dropdown(
                            choices=citation_setting.choices,
                            value=citation_setting.value,
                            container=False,
                            show_label=False,
                            interactive=True,
                            elem_id="citation-dropdown",
                        )

                        if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
                            self.use_mindmap = gr.State(value=True)
                            self.use_mindmap_check = gr.Checkbox(
                                label="Mindmap (on)",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=True,
                            )
                        else:
                            self.use_mindmap = gr.State(value=False)
                            self.use_mindmap_check = gr.Checkbox(
                                label="Mindmap (off)",
                                container=False,
                                elem_id="use-mindmap-checkbox",
                                value=False,
                            )

            with gr.Column(
                scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
            ) as self.info_column:
                with gr.Accordion(
                    label="Information panel", open=True, elem_id="info-expand"
                ):
                    self.modal = gr.HTML("<div id='pdf-modal'></div>")
                    self.plot_panel = gr.Plot(visible=False)
                    self.info_panel = gr.HTML(elem_id="html-info-panel")

        self.followup_questions = self.chat_suggestion.examples
        self.followup_questions_ui = self.chat_suggestion.accordion

    def _json_to_plot(self, json_dict: dict | None):
        if json_dict:
            plot = from_json(json_dict)
            plot = gr.update(visible=True, value=plot)
        else:
            plot = gr.update(visible=False)
        return plot

    def on_register_events(self):
        # first index paper recommendation
        if KH_DEMO_MODE and len(self._indices_input) > 0:
            self._indices_input[1].change(
                self.get_recommendations,
                inputs=[self.first_selector_choices, self._indices_input[1]],
                outputs=[self.related_papers],
            ).then(
                fn=None,
                inputs=None,
                outputs=None,
                js=recommended_papers_js,
            )

        chat_event = (
            gr.on(
                triggers=[
                    self.chat_panel.text_input.submit,
                ],
                fn=self.submit_msg,
                inputs=[
                    self.chat_panel.text_input,
                    self.chat_panel.chatbot,
                    self._app.user_id,
                    self._app.settings_state,
                    self.chat_control.conversation_id,
                    self.chat_control.conversation_rn,
                    self.first_selector_choices,
                ],
                outputs=[
                    self.chat_panel.text_input,
                    self.chat_panel.chatbot,
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                    # file selector from the first index
                    self._indices_input[0],
                    self._indices_input[1],
                    self._command_state,
                ],
                concurrency_limit=20,
                show_progress="hidden",
            )
            .success(
                fn=self.chat_fn,
                inputs=[
                    self.chat_control.conversation_id,
                    self.chat_panel.chatbot,
                    self._app.settings_state,
                    self._reasoning_type,
                    self.model_type,
                    self.use_mindmap,
                    self.citation,
                    self.language,
                    self.state_chat,
                    self._command_state,
                    self._app.user_id,
                ]
                + self._indices_input,
                outputs=[
                    self.chat_panel.chatbot,
                    self.info_panel,
                    self.plot_panel,
                    self.state_plot_panel,
                    self.state_chat,
                ],
                concurrency_limit=20,
                show_progress="minimal",
            )
            .then(
                fn=lambda: True,
                inputs=None,
                outputs=[self._preview_links],
                js=pdfview_js,
            )
            .success(
                fn=self.check_and_suggest_name_conv,
                inputs=self.chat_panel.chatbot,
                outputs=[
                    self.chat_control.conversation_rn,
                    self._conversation_renamed,
                ],
            )
            .success(
                self.chat_control.rename_conv,
                inputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation_rn,
                    self._conversation_renamed,
                    self._app.user_id,
                ],
                outputs=[
                    self.chat_control.conversation,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                ],
                show_progress="hidden",
            )
        )

        onSuggestChatEvent = {
            "fn": self.suggest_chat_conv,
            "inputs": [
                self._app.settings_state,
                self.language,
                self.chat_panel.chatbot,
                self._use_suggestion,
            ],
            "outputs": [
                self.followup_questions_ui,
                self.followup_questions,
            ],
            "show_progress": "hidden",
        }
        # chat suggestion toggle
        chat_event = chat_event.success(**onSuggestChatEvent)

        # final data persist
        if not KH_DEMO_MODE:
            chat_event = chat_event.then(
                fn=self.persist_data_source,
                inputs=[
                    self.chat_control.conversation_id,
                    self._app.user_id,
                    self.info_panel,
                    self.state_plot_panel,
                    self.state_retrieval_history,
                    self.state_plot_history,
                    self.chat_panel.chatbot,
                    self.state_chat,
                ]
                + self._indices_input,
                outputs=[
                    self.state_retrieval_history,
                    self.state_plot_history,
                ],
                concurrency_limit=20,
            )

        self.chat_control.btn_info_expand.click(
            fn=lambda is_expanded: (
                gr.update(scale=INFO_PANEL_SCALES[is_expanded]),
                not is_expanded,
            ),
            inputs=self._info_panel_expanded,
            outputs=[self.info_column, self._info_panel_expanded],
        )
        self.chat_control.btn_chat_expand.click(
            fn=None, inputs=None, js="function() {toggleChatColumn();}"
        )

        if KH_DEMO_MODE:
            self.chat_control.btn_demo_logout.click(
                fn=None,
                js=self.chat_control.logout_js,
            )
            self.chat_control.btn_new.click(
                fn=lambda: self.chat_control.select_conv("", None),
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                    self.chat_panel.chatbot,
                    self.followup_questions,
                    self.info_panel,
                    self.state_plot_panel,
                    self.state_retrieval_history,
                    self.state_plot_history,
                    self.chat_control.cb_is_public,
                    self.state_chat,
                ]
                + self._indices_input,
            ).then(
                lambda: (gr.update(visible=False), gr.update(visible=True)),
                outputs=[self.paper_list.accordion, self.chat_settings],
            ).then(
                fn=None,
                inputs=None,
                js=chat_input_focus_js,
            )

        if not KH_DEMO_MODE:
            self.chat_control.btn_new.click(
                self.chat_control.new_conv,
                inputs=self._app.user_id,
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                ],
                show_progress="hidden",
            ).then(
                self.chat_control.select_conv,
                inputs=[self.chat_control.conversation, self._app.user_id],
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                    self.chat_panel.chatbot,
                    self.followup_questions,
                    self.info_panel,
                    self.state_plot_panel,
                    self.state_retrieval_history,
                    self.state_plot_history,
                    self.chat_control.cb_is_public,
                    self.state_chat,
                ]
                + self._indices_input,
                show_progress="hidden",
            ).then(
                fn=self._json_to_plot,
                inputs=self.state_plot_panel,
                outputs=self.plot_panel,
            ).then(
                fn=None,
                inputs=None,
                js=chat_input_focus_js,
            )

            self.chat_control.btn_del.click(
                lambda id: self.toggle_delete(id),
                inputs=[self.chat_control.conversation_id],
                outputs=[
                    self.chat_control._new_delete,
                    self.chat_control._delete_confirm,
                ],
            )
            self.chat_control.btn_del_conf.click(
                self.chat_control.delete_conv,
                inputs=[self.chat_control.conversation_id, self._app.user_id],
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                ],
                show_progress="hidden",
            ).then(
                self.chat_control.select_conv,
                inputs=[self.chat_control.conversation, self._app.user_id],
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                    self.chat_panel.chatbot,
                    self.followup_questions,
                    self.info_panel,
                    self.state_plot_panel,
                    self.state_retrieval_history,
                    self.state_plot_history,
                    self.chat_control.cb_is_public,
                    self.state_chat,
                ]
                + self._indices_input,
                show_progress="hidden",
            ).then(
                fn=self._json_to_plot,
                inputs=self.state_plot_panel,
                outputs=self.plot_panel,
            ).then(
                lambda: self.toggle_delete(""),
                outputs=[
                    self.chat_control._new_delete,
                    self.chat_control._delete_confirm,
                ],
            )
            self.chat_control.btn_del_cnl.click(
                lambda: self.toggle_delete(""),
                outputs=[
                    self.chat_control._new_delete,
                    self.chat_control._delete_confirm,
                ],
            )
            self.chat_control.btn_conversation_rn.click(
                lambda: gr.update(visible=True),
                outputs=[
                    self.chat_control.conversation_rn,
                ],
            )
            self.chat_control.conversation_rn.submit(
                self.chat_control.rename_conv,
                inputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation_rn,
                    gr.State(value=True),
                    self._app.user_id,
                ],
                outputs=[
                    self.chat_control.conversation,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                ],
                show_progress="hidden",
            )

        onConvSelect = (
            self.chat_control.conversation.select(
                self.chat_control.select_conv,
                inputs=[self.chat_control.conversation, self._app.user_id],
                outputs=[
                    self.chat_control.conversation_id,
                    self.chat_control.conversation,
                    self.chat_control.conversation_rn,
                    self.chat_panel.chatbot,
                    self.followup_questions,
                    self.info_panel,
                    self.state_plot_panel,
                    self.state_retrieval_history,
                    self.state_plot_history,
                    self.chat_control.cb_is_public,
                    self.state_chat,
                ]
                + self._indices_input,
                show_progress="hidden",
            )
            .then(
                fn=self._json_to_plot,
                inputs=self.state_plot_panel,
                outputs=self.plot_panel,
            )
            .then(
                lambda: self.toggle_delete(""),
                outputs=[
                    self.chat_control._new_delete,
                    self.chat_control._delete_confirm,
                ],
            )
        )

        if KH_DEMO_MODE:
            onConvSelect = onConvSelect.then(
                lambda: (gr.update(visible=False), gr.update(visible=True)),
                outputs=[self.paper_list.accordion, self.chat_settings],
            )

        onConvSelect = (
            onConvSelect.then(
                fn=lambda: True,
                js=clear_bot_message_selection_js,
            )
            .then(
                fn=lambda: True,
                inputs=None,
                outputs=[self._preview_links],
                js=pdfview_js,
            )
            .then(fn=None, inputs=None, outputs=None, js=chat_input_focus_js)
        )

        if not KH_DEMO_MODE:
            # evidence display on message selection
            self.chat_panel.chatbot.select(
                self.message_selected,
                inputs=[
                    self.state_retrieval_history,
                    self.state_plot_history,
                ],
                outputs=[
                    self.info_panel,
                    self.state_plot_panel,
                ],
            ).then(
                fn=self._json_to_plot,
                inputs=self.state_plot_panel,
                outputs=self.plot_panel,
            ).then(
                fn=lambda: True,
                inputs=None,
                outputs=[self._preview_links],
                js=pdfview_js,
            )

        self.chat_control.cb_is_public.change(
            self.on_set_public_conversation,
            inputs=[self.chat_control.cb_is_public, self.chat_control.conversation],
            outputs=None,
            show_progress="hidden",
        )

        if not KH_DEMO_MODE:
            # user feedback events
            self.chat_panel.chatbot.like(
                fn=self.is_liked,
                inputs=[self.chat_control.conversation_id],
                outputs=None,
            )
            self.report_issue.report_btn.click(
                self.report_issue.report,
                inputs=[
                    self.report_issue.correctness,
                    self.report_issue.issues,
                    self.report_issue.more_detail,
                    self.chat_control.conversation_id,
                    self.chat_panel.chatbot,
                    self._app.settings_state,
                    self._app.user_id,
                    self.info_panel,
                    self.state_chat,
                ]
                + self._indices_input,
                outputs=None,
            )

        self.reasoning_type.change(
            self.reasoning_changed,
            inputs=[self.reasoning_type],
            outputs=[self._reasoning_type],
        )
        self.use_mindmap_check.change(
            lambda x: (x, gr.update(label="Mindmap " + ("(on)" if x else "(off)"))),
            inputs=[self.use_mindmap_check],
            outputs=[self.use_mindmap, self.use_mindmap_check],
            show_progress="hidden",
        )

        def toggle_chat_suggestion(current_state):
            return current_state, gr.update(visible=current_state)

        def raise_error_on_state(state):
            if not state:
                raise ValueError("Chat suggestion disabled")

        self.chat_control.cb_suggest_chat.change(
            fn=toggle_chat_suggestion,
            inputs=[self.chat_control.cb_suggest_chat],
            outputs=[self._use_suggestion, self.followup_questions_ui],
            show_progress="hidden",
        ).then(
            fn=raise_error_on_state,
            inputs=[self._use_suggestion],
            show_progress="hidden",
        ).success(
            **onSuggestChatEvent
        )
        self.chat_control.conversation_id.change(
            lambda: gr.update(visible=False),
            outputs=self.plot_panel,
        )

        self.followup_questions.select(
            self.chat_suggestion.select_example,
            outputs=[self.chat_panel.text_input],
            show_progress="hidden",
        ).then(
            fn=None,
            inputs=None,
            outputs=None,
            js=chat_input_focus_js,
        )

        if KH_DEMO_MODE:
            self.paper_list.examples.select(
                self.paper_list.select_example,
                inputs=[self.paper_list.papers_state],
                outputs=[self.quick_urls],
                show_progress="hidden",
            ).then(
                lambda: (gr.update(visible=False), gr.update(visible=True)),
                outputs=[self.paper_list.accordion, self.chat_settings],
            ).then(
                fn=None,
                inputs=None,
                outputs=None,
                js=quick_urls_submit_js,
            )

    def submit_msg(
        self,
        chat_input,
        chat_history,
        user_id,
        settings,
        conv_id,
        conv_name,
        first_selector_choices,
        request: gr.Request,
    ):
        """Submit a message to the chatbot"""
        if KH_DEMO_MODE:
            sso_user_id = check_rate_limit("chat", request)
            print("User ID:", sso_user_id)

        if not chat_input:
            raise ValueError("Input is empty")

        chat_input_text = chat_input.get("text", "")
        file_ids = []
        used_command = None

        first_selector_choices_map = {
            item[0]: item[1] for item in first_selector_choices
        }

        # get all file names with pattern @"filename" in input_str
        file_names, chat_input_text = get_file_names_regex(chat_input_text)

        # check if web search command is in file_names
        if WEB_SEARCH_COMMAND in file_names:
            used_command = WEB_SEARCH_COMMAND

        # get all urls in input_str
        urls, chat_input_text = get_urls(chat_input_text)

        if urls and self.first_indexing_url_fn:
            print("Detected URLs", urls)
            file_ids = self.first_indexing_url_fn(
                "\n".join(urls),
                True,
                settings,
                user_id,
                request=None,
            )
        elif file_names:
            for file_name in file_names:
                file_id = first_selector_choices_map.get(file_name)
                if file_id:
                    file_ids.append(file_id)

        # add new file ids to the first selector choices
        first_selector_choices.extend(zip(urls, file_ids))

        # if file_ids is not empty and chat_input_text is empty
        # set the input to summary
        if not chat_input_text and file_ids:
            chat_input_text = DEFAULT_QUESTION

        # if start of conversation and no query is specified
        if not chat_input_text and not chat_history:
            chat_input_text = DEFAULT_QUESTION

        if file_ids:
            selector_output = [
                "select",
                gr.update(value=file_ids, choices=first_selector_choices),
            ]
        else:
            selector_output = [gr.update(), gr.update()]

        # check if regen mode is active
        if chat_input_text:
            chat_history = chat_history + [(chat_input_text, None)]
        else:
            if not chat_history:
                raise gr.Error("Empty chat")

        if not conv_id:
            if not KH_DEMO_MODE:
                id_, update = self.chat_control.new_conv(user_id)
                with Session(engine) as session:
                    statement = select(Conversation).where(Conversation.id == id_)
                    name = session.exec(statement).one().name
                    new_conv_id = id_
                    conv_update = update
                    new_conv_name = name
            else:
                new_conv_id, new_conv_name, conv_update = None, None, gr.update()
        else:
            new_conv_id = conv_id
            conv_update = gr.update()
            new_conv_name = conv_name

        return (
            [
                {},
                chat_history,
                new_conv_id,
                conv_update,
                new_conv_name,
            ]
            + selector_output
            + [used_command]
        )

    def get_recommendations(self, first_selector_choices, file_ids):
        first_selector_choices_map = {
            item[1]: item[0] for item in first_selector_choices
        }
        file_names = [first_selector_choices_map[file_id] for file_id in file_ids]
        if not file_names:
            return ""

        first_file_name = file_names[0].split(".")[0].replace("_", " ")
        return get_recommended_papers(first_file_name)

    def toggle_delete(self, conv_id):
        if conv_id:
            return gr.update(visible=False), gr.update(visible=True)
        else:
            return gr.update(visible=True), gr.update(visible=False)

    def on_set_public_conversation(self, is_public, convo_id):
        if not convo_id:
            gr.Warning("No conversation selected")
            return

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == convo_id)

            result = session.exec(statement).one()
            name = result.name

            if result.is_public != is_public:
                # Only trigger updating when user
                # select different value from the current
                result.is_public = is_public
                session.add(result)
                session.commit()

                gr.Info(
                    f"Conversation: {name} is {'public' if is_public else 'private'}."
                )

    def on_subscribe_public_events(self):
        if self._app.f_user_management:
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.chat_control.reload_conv,
                    "inputs": [self._app.user_id],
                    "outputs": [self.chat_control.conversation],
                    "show_progress": "hidden",
                },
            )

            self._app.subscribe_event(
                name="onSignOut",
                definition={
                    "fn": lambda: self.chat_control.select_conv("", None),
                    "outputs": [
                        self.chat_control.conversation_id,
                        self.chat_control.conversation,
                        self.chat_control.conversation_rn,
                        self.chat_panel.chatbot,
                        self.followup_questions,
                        self.info_panel,
                        self.state_plot_panel,
                        self.state_retrieval_history,
                        self.state_plot_history,
                        self.chat_control.cb_is_public,
                        self.state_chat,
                    ]
                    + self._indices_input,
                    "show_progress": "hidden",
                },
            )

    def _on_app_created(self):
        if KH_DEMO_MODE:
            self._app.app.load(
                fn=lambda x: x,
                inputs=[self._user_api_key],
                outputs=[self._user_api_key],
                js=fetch_api_key_js,
            ).then(
                fn=self.chat_control.toggle_demo_login_visibility,
                inputs=[self._user_api_key],
                outputs=[
                    self.chat_control.cb_suggest_chat,
                    self.chat_control.btn_new,
                    self.chat_control.btn_demo_logout,
                    self.chat_control.btn_demo_login,
                ],
            ).then(
                fn=None,
                inputs=None,
                js=chat_input_focus_js,
            )

    def persist_data_source(
        self,
        convo_id,
        user_id,
        retrieval_msg,
        plot_data,
        retrival_history,
        plot_history,
        messages,
        state,
        *selecteds,
    ):
        """Update the data source"""
        if not convo_id:
            gr.Warning("No conversation selected")
            return

        # if not regen, then append the new message
        if not state["app"].get("regen", False):
            retrival_history = retrival_history + [retrieval_msg]
            plot_history = plot_history + [plot_data]
        else:
            if retrival_history:
                print("Updating retrieval history (regen=True)")
                retrival_history[-1] = retrieval_msg
                plot_history[-1] = plot_data

        # reset regen state
        state["app"]["regen"] = False

        selecteds_ = {}
        for index in self._app.index_manager.indices:
            if index.selector is None:
                continue
            if isinstance(index.selector, int):
                selecteds_[str(index.id)] = selecteds[index.selector]
            else:
                selecteds_[str(index.id)] = [selecteds[i] for i in index.selector]

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == convo_id)
            result = session.exec(statement).one()

            data_source = result.data_source
            old_selecteds = data_source.get("selected", {})
            is_owner = result.user == user_id

            # Write down to db
            result.data_source = {
                "selected": selecteds_ if is_owner else old_selecteds,
                "messages": messages,
                "retrieval_messages": retrival_history,
                "plot_history": plot_history,
                "state": state,
                "likes": deepcopy(data_source.get("likes", [])),
            }
            session.add(result)
            session.commit()

        return retrival_history, plot_history

    def reasoning_changed(self, reasoning_type):
        if reasoning_type != DEFAULT_SETTING:
            # override app settings state (temporary)
            gr.Info("Reasoning type changed to `{}`".format(reasoning_type))
        return reasoning_type

    def is_liked(self, convo_id, liked: gr.LikeData):
        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == convo_id)
            result = session.exec(statement).one()

            data_source = deepcopy(result.data_source)
            likes = data_source.get("likes", [])
            likes.append([liked.index, liked.value, liked.liked])
            data_source["likes"] = likes

            result.data_source = data_source
            session.add(result)
            session.commit()

    def message_selected(self, retrieval_history, plot_history, msg: gr.SelectData):
        index = msg.index[0]
        try:
            retrieval_content, plot_content = (
                retrieval_history[index],
                plot_history[index],
            )
        except IndexError:
            retrieval_content, plot_content = gr.update(), None

        return retrieval_content, plot_content

    def create_pipeline(
        self,
        settings: dict,
        session_reasoning_type: str,
        session_llm: str,
        session_use_mindmap: bool | str,
        session_use_citation: str,
        session_language: str,
        state: dict,
        command_state: str | None,
        user_id: int,
        *selecteds,
    ):
        """Create the pipeline from settings

        Args:
            settings: the settings of the app
            state: the state of the app
            selected: the list of file ids that will be served as context. If None, then
                consider using all files

        Returns:
            - the pipeline objects
        """
        # override reasoning_mode by temporary chat page state
        print(
            "Session reasoning type",
            session_reasoning_type,
            "use mindmap",
            session_use_mindmap,
            "use citation",
            session_use_citation,
            "language",
            session_language,
        )
        print("Session LLM", session_llm)
        reasoning_mode = (
            settings["reasoning.use"]
            if session_reasoning_type in (DEFAULT_SETTING, None)
            else session_reasoning_type
        )
        reasoning_cls = reasonings[reasoning_mode]
        print("Reasoning class", reasoning_cls)
        reasoning_id = reasoning_cls.get_info()["id"]

        settings = deepcopy(settings)
        llm_setting_key = f"reasoning.options.{reasoning_id}.llm"
        if llm_setting_key in settings and session_llm not in (
            DEFAULT_SETTING,
            None,
            "",
        ):
            settings[llm_setting_key] = session_llm

        if session_use_mindmap not in (DEFAULT_SETTING, None):
            settings["reasoning.options.simple.create_mindmap"] = session_use_mindmap

        if session_use_citation not in (DEFAULT_SETTING, None):
            settings[
                "reasoning.options.simple.highlight_citation"
            ] = session_use_citation

        if session_language not in (DEFAULT_SETTING, None):
            settings["reasoning.lang"] = session_language

        # get retrievers
        retrievers = []

        if command_state == WEB_SEARCH_COMMAND:
            # set retriever for web search
            if not WebSearch:
                raise ValueError("Web search back-end is not available.")

            web_search = WebSearch()
            retrievers.append(web_search)
        else:
            for index in self._app.index_manager.indices:
                index_selected = []
                if isinstance(index.selector, int):
                    index_selected = selecteds[index.selector]
                if isinstance(index.selector, tuple):
                    for i in index.selector:
                        index_selected.append(selecteds[i])
                iretrievers = index.get_retriever_pipelines(
                    settings, user_id, index_selected
                )
                retrievers += iretrievers

        # prepare states
        reasoning_state = {
            "app": deepcopy(state["app"]),
            "pipeline": deepcopy(state.get(reasoning_id, {})),
        }

        pipeline = reasoning_cls.get_pipeline(settings, reasoning_state, retrievers)

        return pipeline, reasoning_state

    def chat_fn(
        self,
        conversation_id,
        chat_history,
        settings,
        reasoning_type,
        llm_type,
        use_mind_map,
        use_citation,
        language,
        chat_state,
        command_state,
        user_id,
        *selecteds,
    ):
        """Chat function"""
        chat_input, chat_output = chat_history[-1]
        chat_history = chat_history[:-1]

        # if chat_input is empty, assume regen mode
        if chat_output:
            chat_state["app"]["regen"] = True

        queue: asyncio.Queue[Optional[dict]] = asyncio.Queue()

        # construct the pipeline
        pipeline, reasoning_state = self.create_pipeline(
            settings,
            reasoning_type,
            llm_type,
            use_mind_map,
            use_citation,
            language,
            chat_state,
            command_state,
            user_id,
            *selecteds,
        )
        print("Reasoning state", reasoning_state)
        pipeline.set_output_queue(queue)

        text, refs, plot, plot_gr = "", "", None, gr.update(visible=False)
        msg_placeholder = getattr(
            flowsettings, "KH_CHAT_MSG_PLACEHOLDER", "Thinking ..."
        )
        print(msg_placeholder)
        yield (
            chat_history + [(chat_input, text or msg_placeholder)],
            refs,
            plot_gr,
            plot,
            chat_state,
        )

        try:
            for response in pipeline.stream(chat_input, conversation_id, chat_history):

                if not isinstance(response, Document):
                    continue

                if response.channel is None:
                    continue

                if response.channel == "chat":
                    if response.content is None:
                        text = ""
                    else:
                        text += response.content

                if response.channel == "info":
                    if response.content is None:
                        refs = ""
                    else:
                        refs += response.content

                if response.channel == "plot":
                    plot = response.content
                    plot_gr = self._json_to_plot(plot)

                chat_state[pipeline.get_info()["id"]] = reasoning_state["pipeline"]

                yield (
                    chat_history + [(chat_input, text or msg_placeholder)],
                    refs,
                    plot_gr,
                    plot,
                    chat_state,
                )
        except ValueError as e:
            print(e)

        if not text:
            empty_msg = getattr(
                flowsettings, "KH_CHAT_EMPTY_MSG_PLACEHOLDER", "(Sorry, I don't know)"
            )
            print(f"Generate nothing: {empty_msg}")
            yield (
                chat_history + [(chat_input, text or empty_msg)],
                refs,
                plot_gr,
                plot,
                chat_state,
            )

    def check_and_suggest_name_conv(self, chat_history):
        suggest_pipeline = SuggestConvNamePipeline()
        new_name = gr.update()
        renamed = False

        # check if this is a newly created conversation
        if len(chat_history) == 1:
            suggested_name = suggest_pipeline(chat_history).text
            suggested_name = strip_think_tag(suggested_name)
            suggested_name = suggested_name.replace('"', "").replace("'", "")[:40]
            new_name = gr.update(value=suggested_name)
            renamed = True

        return new_name, renamed

    def suggest_chat_conv(
        self,
        settings,
        session_language,
        chat_history,
        use_suggestion,
    ):
        target_language = (
            session_language
            if session_language not in (DEFAULT_SETTING, None)
            else settings["reasoning.lang"]
        )
        if use_suggestion:
            suggest_pipeline = SuggestFollowupQuesPipeline()
            suggest_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
                target_language, "English"
            )
            suggested_questions = [[each] for each in ChatSuggestion.CHAT_SAMPLES]

            if len(chat_history) >= 1:
                suggested_resp = suggest_pipeline(chat_history).text
                if ques_res := re.search(
                    r"\[(.*?)\]", re.sub("\n", "", suggested_resp)
                ):
                    ques_res_str = ques_res.group()
                    try:
                        suggested_questions = json.loads(ques_res_str)
                        suggested_questions = [[x] for x in suggested_questions]
                    except Exception:
                        pass

            return gr.update(visible=True), suggested_questions

        return gr.update(visible=False), gr.update()


================================================
FILE: libs/ktem/ktem/pages/chat/chat_panel.py
================================================
import gradio as gr
from ktem.app import BasePage
from theflow.settings import settings as flowsettings

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)

if not KH_DEMO_MODE:
    PLACEHOLDER_TEXT = (
        "This is the beginning of a new conversation.\n"
        "Start by uploading a file or a web URL. "
        "Visit Files tab for more options (e.g: GraphRAG)."
    )
else:
    PLACEHOLDER_TEXT = (
        "Welcome to Kotaemon Demo. "
        "Start by browsing preloaded conversations to get onboard.\n"
        "Check out Hint section for more tips."
    )


class ChatPanel(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        self.chatbot = gr.Chatbot(
            label=self._app.app_name,
            placeholder=PLACEHOLDER_TEXT,
            show_label=False,
            elem_id="main-chat-bot",
            show_copy_button=True,
            likeable=True,
            bubble_full_width=False,
        )
        with gr.Row():
            self.text_input = gr.MultimodalTextbox(
                interactive=True,
                scale=20,
                file_count="multiple",
                placeholder=(
                    "Type a message, search the @web, or tag a file with @filename"
                ),
                container=False,
                show_label=False,
                elem_id="chat-input",
            )

    def submit_msg(self, chat_input, chat_history):
        """Submit a message to the chatbot"""
        return "", chat_history + [(chat_input, None)]


================================================
FILE: libs/ktem/ktem/pages/chat/chat_suggestion.py
================================================
import gradio as gr
from ktem.app import BasePage
from theflow.settings import settings as flowsettings


class ChatSuggestion(BasePage):
    CHAT_SAMPLES = getattr(
        flowsettings,
        "KH_FEATURE_CHAT_SUGGESTION_SAMPLES",
        [
            "Summary this document",
            "Generate a FAQ for this document",
            "Identify the main highlights in bullet points",
        ],
    )

    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        self.chat_samples = [[each] for each in self.CHAT_SAMPLES]
        with gr.Accordion(
            label="Chat Suggestion",
            visible=getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False),
        ) as self.accordion:
            self.default_example = gr.State(
                value=self.chat_samples,
            )
            self.examples = gr.DataFrame(
                value=self.chat_samples,
                headers=["Next Question"],
                interactive=False,
                elem_id="chat-suggestion",
                wrap=True,
            )

    def as_gradio_component(self):
        return self.examples

    def select_example(self, ev: gr.SelectData):
        return {"text": ev.value}


================================================
FILE: libs/ktem/ktem/pages/chat/common.py
================================================
DEFAULT_APPLICATION_STATE = {"regen": False}
STATE = {
    "app": DEFAULT_APPLICATION_STATE,
}


================================================
FILE: libs/ktem/ktem/pages/chat/control.py
================================================
import logging
import os
from copy import deepcopy

import gradio as gr
from ktem.app import BasePage
from ktem.db.models import Conversation, User, engine
from sqlmodel import Session, or_, select

import flowsettings

from ...utils.conversation import sync_retrieval_n_message
from .chat_suggestion import ChatSuggestion
from .common import STATE

logger = logging.getLogger(__name__)

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)
ASSETS_DIR = "assets/icons"
if not os.path.isdir(ASSETS_DIR):
    ASSETS_DIR = "libs/ktem/ktem/assets/icons"


logout_js = """
function () {
    removeFromStorage('google_api_key');
    window.location.href = "/logout";
}
"""


def is_conv_name_valid(name):
    """Check if the conversation name is valid"""
    errors = []
    if len(name) == 0:
        errors.append("Name cannot be empty")
    elif len(name) > 40:
        errors.append("Name cannot be longer than 40 characters")

    return "; ".join(errors)


class ConversationControl(BasePage):
    """Manage conversation"""

    def __init__(self, app):
        self._app = app
        self.logout_js = logout_js
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Row():
            title_text = "Conversations" if not KH_DEMO_MODE else "Kotaemon Papers"
            gr.Markdown("## {}".format(title_text))
            self.btn_toggle_dark_mode = gr.Button(
                value="",
                icon=f"{ASSETS_DIR}/dark_mode.svg",
                scale=1,
                size="sm",
                elem_classes=["no-background", "body-text-color"],
                elem_id="toggle-dark-button",
            )
            self.btn_chat_expand = gr.Button(
                value="",
                icon=f"{ASSETS_DIR}/expand.svg",
                scale=1,
                size="sm",
                elem_classes=["no-background", "body-text-color"],
                elem_id="chat-expand-button",
            )
            self.btn_info_expand = gr.Button(
                value="",
                icon=f"{ASSETS_DIR}/expand.svg",
                min_width=2,
                scale=1,
                size="sm",
                elem_classes=["no-background", "body-text-color"],
                elem_id="info-expand-button",
            )

            self.btn_toggle_dark_mode.click(
                None,
                js="""
                () => {
                    document.body.classList.toggle('dark');
                }
                """,
            )

        self.conversation_id = gr.State(value="")
        self.conversation = gr.Dropdown(
            label="Chat sessions",
            choices=[],
            container=False,
            filterable=True,
            interactive=True,
            elem_classes=["unset-overflow"],
            elem_id="conversation-dropdown",
        )

        with gr.Row() as self._new_delete:
            self.cb_suggest_chat = gr.Checkbox(
                value=False,
                label="Suggest chat",
                min_width=10,
                scale=6,
                elem_id="suggest-chat-checkbox",
                container=False,
                visible=not KH_DEMO_MODE,
            )
            self.cb_is_public = gr.Checkbox(
                value=False,
                label="Share this conversation",
                elem_id="is-public-checkbox",
                container=False,
                visible=not KH_DEMO_MODE and not KH_SSO_ENABLED,
            )

            if not KH_DEMO_MODE:
                self.btn_conversation_rn = gr.Button(
                    value="",
                    icon=f"{ASSETS_DIR}/rename.svg",
                    min_width=2,
                    scale=1,
                    size="sm",
                    elem_classes=["no-background", "body-text-color"],
                )
                self.btn_del = gr.Button(
                    value="",
                    icon=f"{ASSETS_DIR}/delete.svg",
                    min_width=2,
                    scale=1,
                    size="sm",
                    elem_classes=["no-background", "body-text-color"],
                )
                self.btn_new = gr.Button(
                    value="",
                    icon=f"{ASSETS_DIR}/new.svg",
                    min_width=2,
                    scale=1,
                    size="sm",
                    elem_classes=["no-background", "body-text-color"],
                    elem_id="new-conv-button",
                )
            else:
                self.btn_new = gr.Button(
                    value="New chat",
                    min_width=120,
                    size="sm",
                    scale=1,
                    variant="primary",
                    elem_id="new-conv-button",
                    visible=False,
                )

        if KH_DEMO_MODE:
            with gr.Row():
                self.btn_demo_login = gr.Button(
                    "Sign-in to create new chat",
                    min_width=120,
                    size="sm",
                    scale=1,
                    variant="primary",
                )
                _js_redirect = """
                () => {
                    url = '/login' + window.location.search;
                    window.open(url, '_blank');
                }
                """
                self.btn_demo_login.click(None, js=_js_redirect)

                self.btn_demo_logout = gr.Button(
                    "Sign-out",
                    min_width=120,
                    size="sm",
                    scale=1,
                    visible=False,
                )

        with gr.Row(visible=False) as self._delete_confirm:
            self.btn_del_conf = gr.Button(
                value="Delete",
                variant="stop",
                min_width=10,
            )
            self.btn_del_cnl = gr.Button(value="Cancel", min_width=10)

        with gr.Row():
            self.conversation_rn = gr.Text(
                label="(Enter) to save",
                placeholder="Conversation name",
                container=True,
                scale=5,
                min_width=10,
                interactive=True,
                visible=False,
            )

    def load_chat_history(self, user_id):
        """Reload chat history"""

        # In case user are admin. They can also watch the
        # public conversations
        can_see_public: bool = False
        with Session(engine) as session:
            statement = select(User).where(User.id == user_id)
            result = session.exec(statement).one_or_none()

            if result is not None:
                if flowsettings.KH_USER_CAN_SEE_PUBLIC:
                    can_see_public = (
                        result.username == flowsettings.KH_USER_CAN_SEE_PUBLIC
                    )
                else:
                    can_see_public = True

        print(f"User-id: {user_id}, can see public conversations: {can_see_public}")

        options = []
        with Session(engine) as session:
            # Define condition based on admin-role:
            # - can_see: can see their conversations & public files
            # - can_not_see: only see their conversations
            if can_see_public:
                statement = (
                    select(Conversation)
                    .where(
                        or_(
                            Conversation.user == user_id,
                            Conversation.is_public,
                        )
                    )
                    .order_by(
                        Conversation.is_public.desc(), Conversation.date_created.desc()
                    )  # type: ignore
                )
            else:
                statement = (
                    select(Conversation)
                    .where(Conversation.user == user_id)
                    .order_by(Conversation.date_created.desc())  # type: ignore
                )

            results = session.exec(statement).all()
            for result in results:
                options.append((result.name, result.id))

        return options

    def reload_conv(self, user_id):
        conv_list = self.load_chat_history(user_id)
        if conv_list:
            return gr.update(value=None, choices=conv_list)
        else:
            return gr.update(value=None, choices=[])

    def new_conv(self, user_id):
        """Create new chat"""
        if user_id is None:
            gr.Warning("Please sign in first (Settings → User Settings)")
            return None, gr.update()
        with Session(engine) as session:
            new_conv = Conversation(user=user_id)
            session.add(new_conv)
            session.commit()

            id_ = new_conv.id

        history = self.load_chat_history(user_id)

        return id_, gr.update(value=id_, choices=history)

    def delete_conv(self, conversation_id, user_id):
        """Delete the selected conversation"""
        if not conversation_id:
            gr.Warning("No conversation selected.")
            return None, gr.update()

        if user_id is None:
            gr.Warning("Please sign in first (Settings → User Settings)")
            return None, gr.update()

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == conversation_id)
            result = session.exec(statement).one()

            session.delete(result)
            session.commit()

        history = self.load_chat_history(user_id)
        if history:
            id_ = history[0][1]
            return id_, gr.update(value=id_, choices=history)
        else:
            return None, gr.update(value=None, choices=[])

    def select_conv(self, conversation_id, user_id):
        """Select the conversation"""
        default_chat_suggestions = [[each] for each in ChatSuggestion.CHAT_SAMPLES]

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == conversation_id)
            try:
                result = session.exec(statement).one()
                id_ = result.id
                name = result.name
                is_conv_public = result.is_public

                # disable file selection ids state if
                # not the owner of the conversation
                if user_id == result.user:
                    selected = result.data_source.get("selected", {})
                else:
                    selected = {}

                chats = result.data_source.get("messages", [])
                chat_suggestions = result.data_source.get(
                    "chat_suggestions", default_chat_suggestions
                )

                retrieval_history: list[str] = result.data_source.get(
                    "retrieval_messages", []
                )
                plot_history: list[dict] = result.data_source.get("plot_history", [])

                # On initialization
                # Ensure len of retrieval and messages are equal
                retrieval_history = sync_retrieval_n_message(chats, retrieval_history)

                info_panel = (
                    retrieval_history[-1]
                    if retrieval_history
                    else "<h5><b>No evidence found.</b></h5>"
                )
                plot_data = plot_history[-1] if plot_history else None
                state = result.data_source.get("state", STATE)

            except Exception as e:
                logger.warning(e)
                id_ = ""
                name = ""
                selected = {}
                chats = []
                chat_suggestions = default_chat_suggestions
                retrieval_history = []
                plot_history = []
                info_panel = ""
                plot_data = None
                state = STATE
                is_conv_public = False

        indices = []
        for index in self._app.index_manager.indices:
            # assume that the index has selector
            if index.selector is None:
                continue
            if isinstance(index.selector, int):
                indices.append(selected.get(str(index.id), index.default_selector))
            if isinstance(index.selector, tuple):
                indices.extend(selected.get(str(index.id), index.default_selector))

        return (
            id_,
            id_,
            name,
            chats,
            chat_suggestions,
            info_panel,
            plot_data,
            retrieval_history,
            plot_history,
            is_conv_public,
            state,
            *indices,
        )

    def rename_conv(self, conversation_id, new_name, is_renamed, user_id):
        """Rename the conversation"""
        if not is_renamed or KH_DEMO_MODE or user_id is None or not conversation_id:
            return (
                gr.update(),
                conversation_id,
                gr.update(visible=False),
            )

        errors = is_conv_name_valid(new_name)
        if errors:
            gr.Warning(errors)
            return (
                gr.update(),
                conversation_id,
                gr.update(visible=False),
            )

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == conversation_id)
            result = session.exec(statement).one()
            result.name = new_name
            session.add(result)
            session.commit()

        history = self.load_chat_history(user_id)
        gr.Info("Conversation renamed.")
        return (
            gr.update(choices=history),
            conversation_id,
            gr.update(visible=False),
        )

    def persist_chat_suggestions(
        self, conversation_id, new_suggestions, is_updated, user_id
    ):
        """Update the conversation's chat suggestions"""
        if not is_updated:
            return

        if user_id is None:
            gr.Warning("Please sign in first (Settings → User Settings)")
            return gr.update(), ""

        if not conversation_id:
            gr.Warning("No conversation selected.")
            return gr.update(), ""

        with Session(engine) as session:
            statement = select(Conversation).where(Conversation.id == conversation_id)
            result = session.exec(statement).one()

            data_source = deepcopy(result.data_source)
            data_source["chat_suggestions"] = [
                [x] for x in new_suggestions.iloc[:, 0].tolist()
            ]

            result.data_source = data_source
            session.add(result)
            session.commit()

        gr.Info("Chat suggestions updated.")

    def toggle_demo_login_visibility(self, user_api_key, request: gr.Request):
        try:
            import gradiologin as grlogin

            user = grlogin.get_user(request)
        except (ImportError, AssertionError):
            user = None

        if user:  # or user_api_key:
            return [
                gr.update(visible=True),
                gr.update(visible=True),
                gr.update(visible=True),
                gr.update(visible=False),
            ]
        else:
            return [
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=True),
            ]

    def _on_app_created(self):
        """Reload the conversation once the app is created"""
        self._app.app.load(
            self.reload_conv,
            inputs=[self._app.user_id],
            outputs=[self.conversation],
        )


================================================
FILE: libs/ktem/ktem/pages/chat/demo_hint.py
================================================
from textwrap import dedent

import gradio as gr
from ktem.app import BasePage


class HintPage(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Accordion(label="Hint", open=False):
            gr.Markdown(
                dedent(
                    """
                - You can select any text from the chat answer to **highlight relevant citation(s)** on the right panel.
                - **Citations** can be viewed on both PDF viewer and raw text.
                - You can tweak the citation format and use advance (CoT) reasoning in **Chat settings** menu.
                - Want to **explore more**? Check out the **Help** section to create your private space.
            """  # noqa
                )
            )


================================================
FILE: libs/ktem/ktem/pages/chat/paper_list.py
================================================
import gradio as gr
from ktem.app import BasePage
from pandas import DataFrame

from ...utils.hf_papers import fetch_papers


class PaperListPage(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        self.papers_state = gr.State(None)
        with gr.Accordion(
            label="Browse popular daily papers",
            open=True,
        ) as self.accordion:
            self.examples = gr.DataFrame(
                value=[],
                headers=["title", "url", "upvotes"],
                column_widths=[60, 30, 10],
                interactive=False,
                elem_id="paper-suggestion",
                wrap=True,
            )
        return self.examples

    def load(self):
        papers = fetch_papers(top_n=5)
        papers_df = DataFrame(papers)
        return (papers_df, papers)

    def _on_app_created(self):
        self._app.app.load(
            self.load,
            outputs=[self.examples, self.papers_state],
        )

    def select_example(self, state, ev: gr.SelectData):
        return state[ev.index[0]]["url"]


================================================
FILE: libs/ktem/ktem/pages/chat/report.py
================================================
from typing import Optional

import gradio as gr
from ktem.app import BasePage
from ktem.db.models import IssueReport, engine
from sqlmodel import Session


class ReportIssue(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Accordion(label="Feedback", open=False, elem_id="report-accordion"):
            self.correctness = gr.Radio(
                choices=[
                    ("The answer is correct", "correct"),
                    ("The answer is incorrect", "incorrect"),
                ],
                label="Correctness:",
            )
            self.issues = gr.CheckboxGroup(
                choices=[
                    ("The answer is offensive", "offensive"),
                    ("The evidence is incorrect", "wrong-evidence"),
                ],
                label="Other issue:",
            )
            self.more_detail = gr.Textbox(
                placeholder=(
                    "More detail (e.g. how wrong is it, what is the "
                    "correct answer, etc...)"
                ),
                container=False,
                lines=3,
            )
            gr.Markdown(
                "This will send the current chat and the user settings to "
                "help with investigation"
            )
            self.report_btn = gr.Button("Report")

    def report(
        self,
        correctness: str,
        issues: list[str],
        more_detail: str,
        conv_id: str,
        chat_history: list,
        settings: dict,
        user_id: Optional[int],
        info_panel: str,
        chat_state: dict,
        *selecteds,
    ):
        selecteds_ = {}
        for index in self._app.index_manager.indices:
            if index.selector is not None:
                if isinstance(index.selector, int):
                    selecteds_[str(index.id)] = selecteds[index.selector]
                elif isinstance(index.selector, tuple):
                    selecteds_[str(index.id)] = [selecteds[_] for _ in index.selector]
                else:
                    print(f"Unknown selector type: {index.selector}")

        with Session(engine) as session:
            issue = IssueReport(
                issues={
                    "correctness": correctness,
                    "issues": issues,
                    "more_detail": more_detail,
                },
                chat={
                    "conv_id": conv_id,
                    "chat_history": chat_history,
                    "info_panel": info_panel,
                    "chat_state": chat_state,
                    "selecteds": selecteds_,
                },
                settings=settings,
                user=user_id,
            )
            session.add(issue)
            session.commit()
        gr.Info("Thank you for your feedback")


================================================
FILE: libs/ktem/ktem/pages/help.py
================================================
from importlib.metadata import version
from pathlib import Path

import gradio as gr
import requests
from decouple import config
from theflow.settings import settings

KH_DEMO_MODE = getattr(settings, "KH_DEMO_MODE", False)
HF_SPACE_URL = config("HF_SPACE_URL", default="")


def get_remote_doc(url: str) -> str:
    try:
        res = requests.get(url)
        res.raise_for_status()
        return res.text
    except Exception as e:
        print(f"Failed to fetch document from {url}: {e}")
        return ""


def download_changelogs(release_url: str) -> str:
    try:
        res = requests.get(release_url).json()
        changelogs = res.get("body", "")

        return changelogs
    except Exception as e:
        print(f"Failed to fetch changelogs from {release_url}: {e}")
        return ""


class HelpPage:
    def __init__(
        self,
        app,
        doc_dir: str = settings.KH_DOC_DIR,
        remote_content_url: str = "https://raw.githubusercontent.com/Cinnamon/kotaemon",
        app_version: str | None = settings.KH_APP_VERSION,
        changelogs_cache_dir: str
        | Path = (Path(settings.KH_APP_DATA_DIR) / "changelogs"),
    ):
        self._app = app
        self.doc_dir = Path(doc_dir)
        self.remote_content_url = remote_content_url
        self.app_version = app_version
        self.changelogs_cache_dir = Path(changelogs_cache_dir)

        self.changelogs_cache_dir.mkdir(parents=True, exist_ok=True)

        about_md_dir = self.doc_dir / "about.md"
        if about_md_dir.exists():
            with (self.doc_dir / "about.md").open(encoding="utf-8") as fi:
                about_md = fi.read()
        else:  # fetch from remote
            about_md = get_remote_doc(
                f"{self.remote_content_url}/v{self.app_version}/docs/about.md"
            )
        if about_md:
            with gr.Accordion("About"):
                if self.app_version:
                    about_md = f"Version: {self.app_version}\n\n{about_md}"
                gr.Markdown(about_md)

        if KH_DEMO_MODE:
            with gr.Accordion("Create Your Own Space"):
                gr.Markdown(
                    "This is a demo with limited functionality. "
                    "Use **Create space** button to install Kotaemon "
                    "in your own space with all features "
                    "(including upload and manage your private "
                    "documents securely)."
                )
                gr.Button(
                    value="Create Your Own Space",
                    link=HF_SPACE_URL,
                    variant="primary",
                    size="lg",
                )

        user_guide_md_dir = self.doc_dir / "usage.md"
        if user_guide_md_dir.exists():
            with (self.doc_dir / "usage.md").open(encoding="utf-8") as fi:
                user_guide_md = fi.read()
        else:  # fetch from remote
            user_guide_md = get_remote_doc(
                f"{self.remote_content_url}/v{self.app_version}/docs/usage.md"
            )
        if user_guide_md:
            with gr.Accordion("User Guide", open=not KH_DEMO_MODE):
                gr.Markdown(user_guide_md)

        if self.app_version:
            # try retrieve from cache
            changelogs = ""

            if (self.changelogs_cache_dir / f"{version}.md").exists():
                with open(self.changelogs_cache_dir / f"{version}.md", "r") as fi:
                    changelogs = fi.read()
            else:
                release_url_base = (
                    "https://api.github.com/repos/Cinnamon/kotaemon/releases"
                )
                changelogs = download_changelogs(
                    release_url=f"{release_url_base}/tags/v{self.app_version}"
                )

                # cache the changelogs
                if not self.changelogs_cache_dir.exists():
                    self.changelogs_cache_dir.mkdir(parents=True, exist_ok=True)
                with open(
                    self.changelogs_cache_dir / f"{self.app_version}.md", "w"
                ) as fi:
                    fi.write(changelogs)

            if changelogs:
                with gr.Accordion(f"Changelogs (v{self.app_version})"):
                    gr.Markdown(changelogs)


================================================
FILE: libs/ktem/ktem/pages/login.py
================================================
import hashlib

import gradio as gr
from ktem.app import BasePage
from ktem.db.models import User, engine
from ktem.pages.resources.user import create_user
from sqlmodel import Session, select

fetch_creds = """
function() {
    const username = getStorage('username', '')
    const password = getStorage('password', '')
    return [username, password, null];
}
"""

signin_js = """
function(usn, pwd) {
    setStorage('username', usn);
    setStorage('password', pwd);
    return [usn, pwd];
}
"""


class LoginPage(BasePage):

    public_events = ["onSignIn"]

    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        gr.Markdown(f"# Welcome to {self._app.app_name}!")
        self.usn = gr.Textbox(label="Username", visible=False)
        self.pwd = gr.Textbox(label="Password", type="password", visible=False)
        self.btn_login = gr.Button("Login", visible=False)

    def on_register_events(self):
        onSignIn = gr.on(
            triggers=[self.btn_login.click, self.pwd.submit],
            fn=self.login,
            inputs=[self.usn, self.pwd],
            outputs=[self._app.user_id, self.usn, self.pwd],
            show_progress="hidden",
            js=signin_js,
        ).then(
            self.toggle_login_visibility,
            inputs=[self._app.user_id],
            outputs=[self.usn, self.pwd, self.btn_login],
        )
        for event in self._app.get_event("onSignIn"):
            onSignIn = onSignIn.success(**event)

    def toggle_login_visibility(self, user_id):
        return (
            gr.update(visible=user_id is None),
            gr.update(visible=user_id is None),
            gr.update(visible=user_id is None),
        )

    def _on_app_created(self):
        onSignIn = self._app.app.load(
            self.login,
            inputs=[self.usn, self.pwd],
            outputs=[self._app.user_id, self.usn, self.pwd],
            show_progress="hidden",
            js=fetch_creds,
        ).then(
            self.toggle_login_visibility,
            inputs=[self._app.user_id],
            outputs=[self.usn, self.pwd, self.btn_login],
        )
        for event in self._app.get_event("onSignIn"):
            onSignIn = onSignIn.success(**event)

    def on_subscribe_public_events(self):
        self._app.subscribe_event(
            name="onSignOut",
            definition={
                "fn": self.toggle_login_visibility,
                "inputs": [self._app.user_id],
                "outputs": [self.usn, self.pwd, self.btn_login],
                "show_progress": "hidden",
            },
        )

    def login(self, usn, pwd, request: gr.Request):
        try:
            import gradiologin as grlogin

            user = grlogin.get_user(request)
        except (ImportError, AssertionError):
            user = None

        if user:
            user_id = user["sub"]
            with Session(engine) as session:
                stmt = select(User).where(
                    User.id == user_id,
                )
                result = session.exec(stmt).all()

            if result:
                print("Existing user:", user)
                return user_id, "", ""
            else:
                print("Creating new user:", user)
                create_user(
                    usn=user["email"],
                    pwd="",
                    user_id=user_id,
                    is_admin=False,
                )
                return user_id, "", ""
        else:
            if not usn or not pwd:
                return None, usn, pwd

            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
            with Session(engine) as session:
                stmt = select(User).where(
                    User.username_lower == usn.lower().strip(),
                    User.password == hashed_password,
                )
                result = session.exec(stmt).all()
                if result:
                    return result[0].id, "", ""

                gr.Warning("Invalid username or password")
                return None, usn, pwd


================================================
FILE: libs/ktem/ktem/pages/resources/__init__.py
================================================
import gradio as gr
from ktem.app import BasePage
from ktem.db.models import User, engine
from ktem.embeddings.ui import EmbeddingManagement
from ktem.index.ui import IndexManagement
from ktem.llms.ui import LLMManagement
from ktem.mcp.ui import MCPManagement
from ktem.rerankings.ui import RerankingManagement
from sqlmodel import Session, select

from .user import UserManagement


class ResourcesTab(BasePage):
    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab("Index Collections") as self.index_management_tab:
            self.index_management = IndexManagement(self._app)

        with gr.Tab("LLMs") as self.llm_management_tab:
            self.llm_management = LLMManagement(self._app)

        with gr.Tab("Embeddings") as self.emb_management_tab:
            self.emb_management = EmbeddingManagement(self._app)

        with gr.Tab("Rerankings") as self.rerank_management_tab:
            self.rerank_management = RerankingManagement(self._app)

        with gr.Tab("MCP Servers") as self.mcp_management_tab:
            self.mcp_management = MCPManagement(self._app)

        if self._app.f_user_management:
            with gr.Tab("Users", visible=False) as self.user_management_tab:
                self.user_management = UserManagement(self._app)

    def on_subscribe_public_events(self):
        if self._app.f_user_management:
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.toggle_user_management,
                    "inputs": [self._app.user_id],
                    "outputs": [self.user_management_tab],
                    "show_progress": "hidden",
                },
            )

            self._app.subscribe_event(
                name="onSignOut",
                definition={
                    "fn": self.toggle_user_management,
                    "inputs": [self._app.user_id],
                    "outputs": [self.user_management_tab],
                    "show_progress": "hidden",
                },
            )

    def toggle_user_management(self, user_id):
        """Show/hide the user management, depending on the user's role"""
        with Session(engine) as session:
            user = session.exec(select(User).where(User.id == user_id)).first()
            if user and user.admin:
                return gr.update(visible=True)

            return gr.update(visible=False)


================================================
FILE: libs/ktem/ktem/pages/resources/user.py
================================================
import hashlib

import gradio as gr
import pandas as pd
from ktem.app import BasePage
from ktem.db.models import User, engine
from sqlmodel import Session, select
from theflow.settings import settings as flowsettings

USERNAME_RULE = """**Username rule:**

- Username is case-insensitive
- Username must be at least 3 characters long
- Username must be at most 32 characters long
- Username must contain only alphanumeric characters and underscores
"""


PASSWORD_RULE = """**Password rule:**

- Password must be at least 8 characters long
- Password must contain at least one uppercase letter
- Password must contain at least one lowercase letter
- Password must contain at least one digit
- Password must contain at least one special character from the following:
    ^ $ * . [ ] { } ( ) ? - " ! @ # % & / \\ , > < ' : ; | _ ~  + =
"""


def validate_username(usn):
    """Validate that whether username is valid

    Args:
        usn (str): Username
    """
    errors = []
    if len(usn) < 3:
        errors.append("Username must be at least 3 characters long")

    if len(usn) > 32:
        errors.append("Username must be at most 32 characters long")

    if not usn.replace("_", "").isalnum():
        errors.append(
            "Username must contain only alphanumeric characters and underscores"
        )

    return "; ".join(errors)


def validate_password(pwd, pwd_cnf):
    """Validate that whether password is valid

    - Password must be at least 8 characters long
    - Password must contain at least one uppercase letter
    - Password must contain at least one lowercase letter
    - Password must contain at least one digit
    - Password must contain at least one special character from the following:
        ^ $ * . [ ] { } ( ) ? - " ! @ # % & / \\ , > < ' : ; | _ ~  + =

    Args:
        pwd (str): Password
        pwd_cnf (str): Confirm password

    Returns:
        str: Error message if password is not valid
    """
    errors = []
    if pwd != pwd_cnf:
        errors.append("Password does not match")

    if len(pwd) < 8:
        errors.append("Password must be at least 8 characters long")

    if not any(c.isupper() for c in pwd):
        errors.append("Password must contain at least one uppercase letter")

    if not any(c.islower() for c in pwd):
        errors.append("Password must contain at least one lowercase letter")

    if not any(c.isdigit() for c in pwd):
        errors.append("Password must contain at least one digit")

    special_chars = "^$*.[]{}()?-\"!@#%&/\\,><':;|_~+="
    if not any(c in special_chars for c in pwd):
        errors.append(
            "Password must contain at least one special character from the "
            f"following: {special_chars}"
        )

    if errors:
        return "; ".join(errors)

    return ""


def create_user(usn, pwd, user_id=None, is_admin=True) -> bool:
    with Session(engine) as session:
        statement = select(User).where(User.username_lower == usn.lower())
        result = session.exec(statement).all()
        if result:
            print(f'User "{usn}" already exists')
            return False

        else:
            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
            user = User(
                id=user_id,
                username=usn,
                username_lower=usn.lower(),
                password=hashed_password,
                admin=is_admin,
            )
            session.add(user)
            session.commit()

            return True


class UserManagement(BasePage):
    def __init__(self, app):
        self._app = app

        self.on_building_ui()
        if hasattr(flowsettings, "KH_FEATURE_USER_MANAGEMENT_ADMIN") and hasattr(
            flowsettings, "KH_FEATURE_USER_MANAGEMENT_PASSWORD"
        ):
            usn = flowsettings.KH_FEATURE_USER_MANAGEMENT_ADMIN
            pwd = flowsettings.KH_FEATURE_USER_MANAGEMENT_PASSWORD

            is_created = create_user(usn, pwd)
            if is_created:
                gr.Info(f'User "{usn}" created successfully')

    def on_building_ui(self):
        with gr.Tab(label="User list"):
            self.state_user_list = gr.State(value=None)
            self.user_list = gr.DataFrame(
                headers=["id", "name", "admin"],
                column_widths=[0, 50, 50],
                interactive=False,
            )

            with gr.Group(visible=False) as self._selected_panel:
                self.selected_user_id = gr.State(value=-1)
                self.usn_edit = gr.Textbox(label="Username")
                with gr.Row():
                    self.pwd_edit = gr.Textbox(label="Change password", type="password")
                    self.pwd_cnf_edit = gr.Textbox(
                        label="Confirm change password",
                        type="password",
                    )
                self.admin_edit = gr.Checkbox(label="Admin")

            with gr.Row(visible=False) as self._selected_panel_btn:
                with gr.Column():
                    self.btn_edit_save = gr.Button("Save")
                with gr.Column():
                    self.btn_delete = gr.Button("Delete")
                    with gr.Row():
                        self.btn_delete_yes = gr.Button(
                            "Confirm delete", variant="primary", visible=False
                        )
                        self.btn_delete_no = gr.Button("Cancel", visible=False)
                with gr.Column():
                    self.btn_close = gr.Button("Close")

        with gr.Tab(label="Create user"):
            self.usn_new = gr.Textbox(label="Username", interactive=True)
            self.pwd_new = gr.Textbox(
                label="Password", type="password", interactive=True
            )
            self.pwd_cnf_new = gr.Textbox(
                label="Confirm password", type="password", interactive=True
            )
            with gr.Row():
                gr.Markdown(USERNAME_RULE)
                gr.Markdown(PASSWORD_RULE)
            self.btn_new = gr.Button("Create user")

    def on_register_events(self):
        self.btn_new.click(
            self.create_user,
            inputs=[self.usn_new, self.pwd_new, self.pwd_cnf_new],
            outputs=[self.usn_new, self.pwd_new, self.pwd_cnf_new],
        ).then(
            self.list_users,
            inputs=self._app.user_id,
            outputs=[self.state_user_list, self.user_list],
        )
        self.user_list.select(
            self.select_user,
            inputs=self.user_list,
            outputs=[self.selected_user_id],
            show_progress="hidden",
        )
        self.selected_user_id.change(
            self.on_selected_user_change,
            inputs=[self.selected_user_id],
            outputs=[
                self._selected_panel,
                self._selected_panel_btn,
                # delete section
                self.btn_delete,
                self.btn_delete_yes,
                self.btn_delete_no,
                # edit section
                self.usn_edit,
                self.pwd_edit,
                self.pwd_cnf_edit,
                self.admin_edit,
            ],
            show_progress="hidden",
        )
        self.btn_delete.click(
            self.on_btn_delete_click,
            inputs=[self.selected_user_id],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_user,
            inputs=[self._app.user_id, self.selected_user_id],
            outputs=[self.selected_user_id],
            show_progress="hidden",
        ).then(
            self.list_users,
            inputs=self._app.user_id,
            outputs=[self.state_user_list, self.user_list],
        )
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_edit_save.click(
            self.save_user,
            inputs=[
                self.selected_user_id,
                self.usn_edit,
                self.pwd_edit,
                self.pwd_cnf_edit,
                self.admin_edit,
            ],
            outputs=[self.pwd_edit, self.pwd_cnf_edit],
            show_progress="hidden",
        ).then(
            self.list_users,
            inputs=self._app.user_id,
            outputs=[self.state_user_list, self.user_list],
        )
        self.btn_close.click(
            lambda: -1,
            outputs=[self.selected_user_id],
        )

    def on_subscribe_public_events(self):
        self._app.subscribe_event(
            name="onSignIn",
            definition={
                "fn": self.list_users,
                "inputs": [self._app.user_id],
                "outputs": [self.state_user_list, self.user_list],
            },
        )
        self._app.subscribe_event(
            name="onSignOut",
            definition={
                "fn": lambda: ("", "", "", None, None, -1),
                "outputs": [
                    self.usn_new,
                    self.pwd_new,
                    self.pwd_cnf_new,
                    self.state_user_list,
                    self.user_list,
                    self.selected_user_id,
                ],
            },
        )

    def create_user(self, usn, pwd, pwd_cnf):
        errors = validate_username(usn)
        if errors:
            gr.Warning(errors)
            return usn, pwd, pwd_cnf

        errors = validate_password(pwd, pwd_cnf)
        print(errors)
        if errors:
            gr.Warning(errors)
            return usn, pwd, pwd_cnf

        with Session(engine) as session:
            statement = select(User).where(User.username_lower == usn.lower())
            result = session.exec(statement).all()
            if result:
                gr.Warning(f'Username "{usn}" already exists')
                return

            hashed_password = hashlib.sha256(pwd.encode()).hexdigest()
            user = User(
                username=usn, username_lower=usn.lower(), password=hashed_password
            )
            session.add(user)
            session.commit()
            gr.Info(f'User "{usn}" created successfully')

        return "", "", ""

    def list_users(self, user_id):
        if user_id is None:
            return [], pd.DataFrame.from_records(
                [{"id": "-", "username": "-", "admin": "-"}]
            )

        with Session(engine) as session:
            statement = select(User).where(User.id == user_id)
            user = session.exec(statement).one()
            if not user.admin:
                return [], pd.DataFrame.from_records(
                    [{"id": "-", "username": "-", "admin": "-"}]
                )

            statement = select(User)
            results = [
                {"id": user.id, "username": user.username, "admin": user.admin}
                for user in session.exec(statement).all()
            ]
            if results:
                user_list = pd.DataFrame.from_records(results)
            else:
                user_list = pd.DataFrame.from_records(
                    [{"id": "-", "username": "-", "admin": "-"}]
                )

        return results, user_list

    def select_user(self, user_list, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No user is loaded. Please refresh the user list")
            return -1

        if not ev.selected:
            return -1

        return user_list["id"][ev.index[0]]

    def on_selected_user_change(self, selected_user_id):
        if selected_user_id == -1:
            _selected_panel = gr.update(visible=False)
            _selected_panel_btn = gr.update(visible=False)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)
            usn_edit = gr.update(value="")
            pwd_edit = gr.update(value="")
            pwd_cnf_edit = gr.update(value="")
            admin_edit = gr.update(value=False)
        else:
            _selected_panel = gr.update(visible=True)
            _selected_panel_btn = gr.update(visible=True)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)

            with Session(engine) as session:
                statement = select(User).where(User.id == selected_user_id)
                user = session.exec(statement).one()

            usn_edit = gr.update(value=user.username)
            pwd_edit = gr.update(value="")
            pwd_cnf_edit = gr.update(value="")
            admin_edit = gr.update(value=user.admin)

        return (
            _selected_panel,
            _selected_panel_btn,
            btn_delete,
            btn_delete_yes,
            btn_delete_no,
            usn_edit,
            pwd_edit,
            pwd_cnf_edit,
            admin_edit,
        )

    def on_btn_delete_click(self, selected_user_id):
        if selected_user_id is None:
            gr.Warning("No user is selected")
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)
            return

        btn_delete = gr.update(visible=False)
        btn_delete_yes = gr.update(visible=True)
        btn_delete_no = gr.update(visible=True)

        return btn_delete, btn_delete_yes, btn_delete_no

    def save_user(self, selected_user_id, usn, pwd, pwd_cnf, admin):
        errors = validate_username(usn)
        if errors:
            gr.Warning(errors)
            return pwd, pwd_cnf

        if pwd:
            errors = validate_password(pwd, pwd_cnf)
            if errors:
                gr.Warning(errors)
                return pwd, pwd_cnf

        with Session(engine) as session:
            # Check username uniqueness (excluding current user)
            statement = select(User).where(
                User.username_lower == usn.lower(),
                User.id != selected_user_id,
            )
            existing = session.exec(statement).first()
            if existing:
                gr.Warning(
                    f'Username "{usn}" already exists. Please use a unique name.'
                )
                return pwd, pwd_cnf

            statement = select(User).where(User.id == selected_user_id)
            user = session.exec(statement).one()
            user.username = usn
            user.username_lower = usn.lower()
            user.admin = admin
            if pwd:
                user.password = hashlib.sha256(pwd.encode()).hexdigest()
            session.commit()
            gr.Info(f'User "{usn}" updated successfully')

        return "", ""

    def delete_user(self, current_user, selected_user_id):
        if current_user == selected_user_id:
            gr.Warning("You cannot delete yourself")
            return selected_user_id

        with Session(engine) as session:
            statement = select(User).where(User.id == selected_user_id)
            user = session.exec(statement).one()
            session.delete(user)
            session.commit()
            gr.Info(f'User "{user.username}" deleted successfully')
        return -1


================================================
FILE: libs/ktem/ktem/pages/settings.py
================================================
import hashlib

import gradio as gr
from ktem.app import BasePage
from ktem.components import reasonings
from ktem.db.models import Settings, User, engine
from sqlmodel import Session, select
from theflow.settings import settings as flowsettings

KH_SSO_ENABLED = getattr(flowsettings, "KH_SSO_ENABLED", False)


signout_js = """
function(u, c, pw, pwc) {
    removeFromStorage('username');
    removeFromStorage('password');
    return [u, c, pw, pwc];
}
"""


gr_cls_single_value = {
    "text": gr.Textbox,
    "number": gr.Number,
    "checkbox": gr.Checkbox,
}


gr_cls_choices = {
    "dropdown": gr.Dropdown,
    "radio": gr.Radio,
    "checkboxgroup": gr.CheckboxGroup,
}


def render_setting_item(setting_item, value):
    """Render the setting component into corresponding Gradio UI component"""
    kwargs = {
        "label": setting_item.name,
        "value": value,
        "interactive": True,
    }

    if setting_item.component in gr_cls_single_value:
        return gr_cls_single_value[setting_item.component](**kwargs)

    kwargs["choices"] = setting_item.choices

    if setting_item.component in gr_cls_choices:
        return gr_cls_choices[setting_item.component](**kwargs)

    raise ValueError(
        f"Unknown component {setting_item.component}, allowed are: "
        f"{list(gr_cls_single_value.keys()) + list(gr_cls_choices.keys())}.\n"
        f"Setting item: {setting_item}"
    )


class SettingsPage(BasePage):
    """Responsible for allowing the users to customize the application

    **IMPORTANT**: the name and id of the UI setting components should match the
    name of the setting in the `app.default_settings`
    """

    public_events = ["onSignOut"]

    def __init__(self, app):
        """Initiate the page and render the UI"""
        self._app = app

        self._settings_state = app.settings_state
        self._user_id = app.user_id
        self._default_settings = app.default_settings
        self._settings_dict = self._default_settings.flatten()
        self._settings_keys = list(self._settings_dict.keys())

        self._components = {}
        self._reasoning_mode = {}

        # store llms and embeddings components
        self._llms = []
        self._embeddings = []

        # render application page if there are application settings
        self._render_app_tab = False

        if not KH_SSO_ENABLED and self._default_settings.application.settings:
            self._render_app_tab = True

        # render index page if there are index settings (general and/or specific)
        self._render_index_tab = False

        if not KH_SSO_ENABLED:
            if self._default_settings.index.settings:
                self._render_index_tab = True
            else:
                for sig in self._default_settings.index.options.values():
                    if sig.settings:
                        self._render_index_tab = True
                        break

        # render reasoning page if there are reasoning settings
        self._render_reasoning_tab = False

        if not KH_SSO_ENABLED:
            if len(self._default_settings.reasoning.settings) > 1:
                self._render_reasoning_tab = True
            else:
                for sig in self._default_settings.reasoning.options.values():
                    if sig.settings:
                        self._render_reasoning_tab = True
                        break

        self.on_building_ui()

    def on_building_ui(self):
        if not KH_SSO_ENABLED:
            self.setting_save_btn = gr.Button(
                "Save & Close",
                variant="primary",
                elem_classes=["right-button"],
                elem_id="save-setting-btn",
            )
        if self._app.f_user_management:
            with gr.Tab("User settings"):
                self.user_tab()

        self.app_tab()
        self.index_tab()
        self.reasoning_tab()

    def on_subscribe_public_events(self):
        """
        Subscribes to public events related to user management.

        This function is responsible for subscribing to the "onSignIn" event, which is
        triggered when a user signs in. It registers two event handlers for this event.

        The first event handler, "load_setting", is responsible for loading the user's
        settings when they sign in. It takes the user ID as input and returns the
        settings state and a list of component outputs. The progress indicator for this
        event is set to "hidden".

        The second event handler, "get_name", is responsible for retrieving the
        username of the current user. It takes the user ID as input and returns the
        username if it exists, otherwise it returns "___". The progress indicator for
        this event is also set to "hidden".

        Parameters:
            self (object): The instance of the class.

        Returns:
            None
        """
        if self._app.f_user_management:
            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": self.load_setting,
                    "inputs": self._user_id,
                    "outputs": [self._settings_state] + self.components(),
                    "show_progress": "hidden",
                },
            )

            def get_name(user_id):
                name = "Current user: "
                if user_id:
                    with Session(engine) as session:
                        statement = select(User).where(User.id == user_id)
                        result = session.exec(statement).all()
                        if result:
                            return name + result[0].username
                return name + "___"

            self._app.subscribe_event(
                name="onSignIn",
                definition={
                    "fn": get_name,
                    "inputs": self._user_id,
                    "outputs": [self.current_name],
                    "show_progress": "hidden",
                },
            )

    def on_register_events(self):
        if not KH_SSO_ENABLED:
            self.setting_save_btn.click(
                self.save_setting,
                inputs=[self._user_id] + self.components(),
                outputs=self._settings_state,
            ).then(
                lambda: gr.Tabs(selected="chat-tab"),
                outputs=self._app.tabs,
            )
        self._components["reasoning.use"].change(
            self.change_reasoning_mode,
            inputs=[self._components["reasoning.use"]],
            outputs=list(self._reasoning_mode.values()),
            show_progress="hidden",
        )
        if self._app.f_user_management and not KH_SSO_ENABLED:
            self.password_change_btn.click(
                self.change_password,
                inputs=[
                    self._user_id,
                    self.password_change,
                    self.password_change_confirm,
                ],
                outputs=[self.password_change, self.password_change_confirm],
                show_progress="hidden",
            )
            onSignOutClick = self.signout.click(
                lambda: (None, "Current user: ___", "", ""),
                inputs=[],
                outputs=[
                    self._user_id,
                    self.current_name,
                    self.password_change,
                    self.password_change_confirm,
                ],
                show_progress="hidden",
                js=signout_js,
            ).then(
                self.load_setting,
                inputs=self._user_id,
                outputs=[self._settings_state] + self.components(),
                show_progress="hidden",
            )
            for event in self._app.get_event("onSignOut"):
                onSignOutClick = onSignOutClick.then(**event)

    def user_tab(self):
        # user management
        self.current_name = gr.Markdown("Current user: ___")

        if KH_SSO_ENABLED:
            import gradiologin as grlogin

            self.sso_signout = grlogin.LogoutButton("Logout")
        else:
            self.signout = gr.Button("Logout")

            self.password_change = gr.Textbox(
                label="New password", interactive=True, type="password"
            )
            self.password_change_confirm = gr.Textbox(
                label="Confirm password", interactive=True, type="password"
            )
            self.password_change_btn = gr.Button("Change password", interactive=True)

    def change_password(self, user_id, password, password_confirm):
        from ktem.pages.resources.user import validate_password

        errors = validate_password(password, password_confirm)
        if errors:
            print(errors)
            gr.Warning(errors)
            return password, password_confirm

        with Session(engine) as session:
            statement = select(User).where(User.id == user_id)
            result = session.exec(statement).all()
            if result:
                user = result[0]
                hashed_password = hashlib.sha256(password.encode()).hexdigest()
                user.password = hashed_password
                session.add(user)
                session.commit()
                gr.Info("Password changed")
            else:
                gr.Warning("User not found")

        return "", ""

    def app_tab(self):
        with gr.Tab("General", visible=self._render_app_tab):
            for n, si in self._default_settings.application.settings.items():
                obj = render_setting_item(si, si.value)
                self._components[f"application.{n}"] = obj
                if si.special_type == "llm":
                    self._llms.append(obj)
                if si.special_type == "embedding":
                    self._embeddings.append(obj)

    def index_tab(self):
        # TODO: double check if we need general
        # with gr.Tab("General"):
        #     for n, si in self._default_settings.index.settings.items():
        #         obj = render_setting_item(si, si.value)
        #         self._components[f"index.{n}"] = obj

        id2name = {k: v.name for k, v in self._app.index_manager.info().items()}
        with gr.Tab("Retrieval settings", visible=self._render_index_tab):
            for pn, sig in self._default_settings.index.options.items():
                name = id2name.get(pn, f"<id {pn}>")
                with gr.Tab(name):
                    for n, si in sig.settings.items():
                        obj = render_setting_item(si, si.value)
                        self._components[f"index.options.{pn}.{n}"] = obj
                        if si.special_type == "llm":
                            self._llms.append(obj)
                        if si.special_type == "embedding":
                            self._embeddings.append(obj)

    def reasoning_tab(self):
        with gr.Tab("Reasoning settings", visible=self._render_reasoning_tab):
            with gr.Group():
                for n, si in self._default_settings.reasoning.settings.items():
                    if n == "use":
                        continue
                    obj = render_setting_item(si, si.value)
                    self._components[f"reasoning.{n}"] = obj
                    if si.special_type == "llm":
                        self._llms.append(obj)
                    if si.special_type == "embedding":
                        self._embeddings.append(obj)

            gr.Markdown("### Reasoning-specific settings")
            self._components["reasoning.use"] = render_setting_item(
                self._default_settings.reasoning.settings["use"],
                self._default_settings.reasoning.settings["use"].value,
            )

            for idx, (pn, sig) in enumerate(
                self._default_settings.reasoning.options.items()
            ):
                with gr.Group(
                    visible=idx == 0,
                    elem_id=pn,
                ) as self._reasoning_mode[pn]:
                    reasoning = reasonings.get(pn, None)
                    if reasoning is None:
                        gr.Markdown("**Name**: Description")
                    else:
                        info = reasoning.get_info()
                        gr.Markdown(f"**{info['name']}**: {info['description']}")
                    for n, si in sig.settings.items():
                        obj = render_setting_item(si, si.value)
                        self._components[f"reasoning.options.{pn}.{n}"] = obj
                        if si.special_type == "llm":
                            self._llms.append(obj)
                        if si.special_type == "embedding":
                            self._embeddings.append(obj)

    def change_reasoning_mode(self, value):
        output = []
        for each in self._reasoning_mode.values():
            if value == each.elem_id:
                output.append(gr.update(visible=True))
            else:
                output.append(gr.update(visible=False))
        return output

    def load_setting(self, user_id=None):
        settings = self._settings_dict
        with Session(engine) as session:
            statement = select(Settings).where(Settings.user == user_id)
            result = session.exec(statement).all()
            if result:
                settings = result[0].setting

        output = [settings]
        output += tuple(settings[name] for name in self.component_names())
        return output

    def save_setting(self, user_id: int, *args):
        """Save the setting to disk and persist the setting to session state

        Args:
            user_id: the user id
            args: all the values from the settings
        """
        setting = {key: value for key, value in zip(self.component_names(), args)}
        if user_id is None:
            gr.Warning("Need to login before saving settings")
            return setting

        with Session(engine) as session:
            statement = select(Settings).where(Settings.user == user_id)
            try:
                user_setting = session.exec(statement).one()
            except Exception:
                user_setting = Settings()
                user_setting.user = user_id
            user_setting.setting = setting
            session.add(user_setting)
            session.commit()

        gr.Info("Setting saved")
        return setting

    def components(self) -> list:
        """Get the setting components"""
        output = []
        for name in self._settings_keys:
            output.append(self._components[name])
        return output

    def component_names(self):
        """Get the setting components"""
        return self._settings_keys

    def _on_app_created(self):
        if not self._app.f_user_management:
            self._app.app.load(
                self.load_setting,
                inputs=self._user_id,
                outputs=[self._settings_state] + self.components(),
                show_progress="hidden",
            )

        def update_llms():
            from ktem.llms.manager import llms

            if llms._default:
                llm_choices = [(f"{llms._default} (default)", "")]
            else:
                llm_choices = [("(random)", "")]
            llm_choices += [(_, _) for _ in llms.options().keys()]
            return gr.update(choices=llm_choices)

        def update_embeddings():
            from ktem.embeddings.manager import embedding_models_manager

            if embedding_models_manager._default:
                emb_choices = [(f"{embedding_models_manager._default} (default)", "")]
            else:
                emb_choices = [("(random)", "")]
            emb_choices += [(_, _) for _ in embedding_models_manager.options().keys()]
            return gr.update(choices=emb_choices)

        for llm in self._llms:
            self._app.app.load(
                update_llms,
                inputs=[],
                outputs=[llm],
                show_progress="hidden",
            )
        for emb in self._embeddings:
            self._app.app.load(
                update_embeddings,
                inputs=[],
                outputs=[emb],
                show_progress="hidden",
            )


================================================
FILE: libs/ktem/ktem/pages/setup.py
================================================
import json

import gradio as gr
import requests
from decouple import config
from ktem.app import BasePage
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from ktem.rerankings.manager import reranking_models_manager as rerankers
from theflow.settings import settings as flowsettings

KH_OLLAMA_URL = getattr(flowsettings, "KH_OLLAMA_URL", "http://localhost:11434/v1/")
DEFAULT_OLLAMA_URL = KH_OLLAMA_URL.replace("v1", "api")
if DEFAULT_OLLAMA_URL.endswith("/"):
    DEFAULT_OLLAMA_URL = DEFAULT_OLLAMA_URL[:-1]


DEMO_MESSAGE = (
    "This is a public space. Please use the "
    '"Duplicate Space" function on the top right '
    "corner to setup your own space."
)


def pull_model(name: str, stream: bool = True):
    payload = {"name": name}
    headers = {"Content-Type": "application/json"}

    response = requests.post(
        DEFAULT_OLLAMA_URL + "/pull", json=payload, headers=headers, stream=stream
    )

    # Check if the request was successful
    response.raise_for_status()

    if stream:
        for line in response.iter_lines():
            if line:
                data = json.loads(line.decode("utf-8"))
                yield data
                if data.get("status") == "success":
                    break
    else:
        data = response.json()

    return data


class SetupPage(BasePage):

    public_events = ["onFirstSetupComplete"]

    def __init__(self, app):
        self._app = app
        self.on_building_ui()

    def on_building_ui(self):
        gr.Markdown(f"# Welcome to {self._app.app_name} first setup!")
        self.radio_model = gr.Radio(
            [
                ("Cohere API (*free registration*) - recommended", "cohere"),
                ("Google API (*free registration*)", "google"),
                ("OpenAI API (for GPT-based models)", "openai"),
                ("Local LLM (for completely *private RAG*)", "ollama"),
            ],
            label="Select your model provider",
            value="cohere",
            info=(
                "Note: You can change this later. "
                "If you are not sure, go with the first option "
                "which fits most normal users."
            ),
            interactive=True,
        )

        with gr.Column(visible=False) as self.openai_option:
            gr.Markdown(
                (
                    "#### OpenAI API Key\n\n"
                    "(create at https://platform.openai.com/api-keys)"
                )
            )
            self.openai_api_key = gr.Textbox(
                show_label=False, placeholder="OpenAI API Key"
            )

        with gr.Column(visible=True) as self.cohere_option:
            gr.Markdown(
                (
                    "#### Cohere API Key\n\n"
                    "(register your free API key "
                    "at https://dashboard.cohere.com/api-keys)"
                )
            )
            self.cohere_api_key = gr.Textbox(
                show_label=False, placeholder="Cohere API Key"
            )

        with gr.Column(visible=False) as self.google_option:
            gr.Markdown(
                (
                    "#### Google API Key\n\n"
                    "(register your free API key "
                    "at https://aistudio.google.com/app/apikey)"
                )
            )
            self.google_api_key = gr.Textbox(
                show_label=False, placeholder="Google API Key"
            )

        with gr.Column(visible=False) as self.ollama_option:
            gr.Markdown(
                (
                    "#### Setup Ollama\n\n"
                    "Download and install Ollama from "
                    "https://ollama.com/. Check out latest models at "
                    "https://ollama.com/library. "
                )
            )
            self.ollama_model_name = gr.Textbox(
                label="LLM model name",
                value=config("LOCAL_MODEL", default="qwen2.5:7b"),
            )
            self.ollama_emb_model_name = gr.Textbox(
                label="Embedding model name",
                value=config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
            )

        self.setup_log = gr.HTML(
            show_label=False,
        )

        with gr.Row():
            self.btn_finish = gr.Button("Proceed", variant="primary")
            self.btn_skip = gr.Button(
                "I am an advance user. Skip this.", variant="stop"
            )

    def on_register_events(self):
        onFirstSetupComplete = gr.on(
            triggers=[
                self.btn_finish.click,
                self.cohere_api_key.submit,
                self.openai_api_key.submit,
            ],
            fn=self.update_model,
            inputs=[
                self.cohere_api_key,
                self.openai_api_key,
                self.google_api_key,
                self.ollama_model_name,
                self.ollama_emb_model_name,
                self.radio_model,
            ],
            outputs=[self.setup_log],
            show_progress="hidden",
        )
        onSkipSetup = gr.on(
            triggers=[self.btn_skip.click],
            fn=lambda: None,
            inputs=[],
            show_progress="hidden",
            outputs=[self.radio_model],
        )

        for event in self._app.get_event("onFirstSetupComplete"):
            onSkipSetup = onSkipSetup.success(**event)

        onFirstSetupComplete = onFirstSetupComplete.success(
            fn=self.update_default_settings,
            inputs=[self.radio_model, self._app.settings_state],
            outputs=self._app.settings_state,
        )
        for event in self._app.get_event("onFirstSetupComplete"):
            onFirstSetupComplete = onFirstSetupComplete.success(**event)

        self.radio_model.change(
            fn=self.switch_options_view,
            inputs=[self.radio_model],
            show_progress="hidden",
            outputs=[
                self.cohere_option,
                self.openai_option,
                self.ollama_option,
                self.google_option,
            ],
        )

    def update_model(
        self,
        cohere_api_key,
        openai_api_key,
        google_api_key,
        ollama_model_name,
        ollama_emb_model_name,
        radio_model_value,
    ):
        log_content = ""
        if not radio_model_value:
            gr.Info("Skip setup models.")
            yield gr.value(visible=False)
            return

        if radio_model_value == "cohere":
            if cohere_api_key:
                llms.update(
                    name="cohere",
                    spec={
                        "__type__": "kotaemon.llms.chats.LCCohereChat",
                        "model_name": "command-r-plus-08-2024",
                        "api_key": cohere_api_key,
                    },
                    default=True,
                )
                embeddings.update(
                    name="cohere",
                    spec={
                        "__type__": "kotaemon.embeddings.LCCohereEmbeddings",
                        "model": "embed-multilingual-v3.0",
                        "cohere_api_key": cohere_api_key,
                        "user_agent": "default",
                    },
                    default=True,
                )
                rerankers.update(
                    name="cohere",
                    spec={
                        "__type__": "kotaemon.rerankings.CohereReranking",
                        "model_name": "rerank-multilingual-v2.0",
                        "cohere_api_key": cohere_api_key,
                    },
                    default=True,
                )
        elif radio_model_value == "openai":
            if openai_api_key:
                llms.update(
                    name="openai",
                    spec={
                        "__type__": "kotaemon.llms.ChatOpenAI",
                        "base_url": "https://api.openai.com/v1",
                        "model": "gpt-4o",
                        "api_key": openai_api_key,
                        "timeout": 20,
                    },
                    default=True,
                )
                embeddings.update(
                    name="openai",
                    spec={
                        "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
                        "base_url": "https://api.openai.com/v1",
                        "model": "text-embedding-3-large",
                        "api_key": openai_api_key,
                        "timeout": 10,
                        "context_length": 8191,
                    },
                    default=True,
                )
        elif radio_model_value == "google":
            if google_api_key:
                llms.update(
                    name="google",
                    spec={
                        "__type__": "kotaemon.llms.chats.LCGeminiChat",
                        "model_name": "gemini-1.5-flash",
                        "api_key": google_api_key,
                    },
                    default=True,
                )
                embeddings.update(
                    name="google",
                    spec={
                        "__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
                        "model": "models/text-embedding-004",
                        "google_api_key": google_api_key,
                    },
                    default=True,
                )
        elif radio_model_value == "ollama":
            llms.update(
                name="ollama",
                spec={
                    "__type__": "kotaemon.llms.ChatOpenAI",
                    "base_url": KH_OLLAMA_URL,
                    "model": ollama_model_name,
                    "api_key": "ollama",
                },
                default=True,
            )
            embeddings.update(
                name="ollama",
                spec={
                    "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
                    "base_url": KH_OLLAMA_URL,
                    "model": ollama_emb_model_name,
                    "api_key": "ollama",
                },
                default=True,
            )

            # download required models through ollama
            llm_model_name = llms.get("ollama").model  # type: ignore
            emb_model_name = embeddings.get("ollama").model  # type: ignore

            try:
                for model_name in [emb_model_name, llm_model_name]:
                    log_content += f"- Downloading model `{model_name}` from Ollama<br>"
                    yield log_content

                    pre_download_log = log_content

                    for response in pull_model(model_name):
                        complete = response.get("completed", 0)
                        total = response.get("total", 0)
                        if complete > 0 and total > 0:
                            ratio = int(complete / total * 100)
                            log_content = (
                                pre_download_log
                                + f"- {response.get('status')}: {ratio}%<br>"
                            )
                        else:
                            if "pulling" not in response.get("status", ""):
                                log_content += f"- {response.get('status')}<br>"

                        yield log_content
            except Exception as e:
                log_content += (
                    "Make sure you have download and installed Ollama correctly. "
                    f"Got error: {str(e)}"
                )
                yield log_content
                raise gr.Error("Failed to download model from Ollama.")

        # test models connection
        llm_output = emb_output = None

        # LLM model
        log_content += f"- Testing LLM model: {radio_model_value}<br>"
        yield log_content

        llm = llms.get(radio_model_value)  # type: ignore
        log_content += "- Sending a message `Hi`<br>"
        yield log_content
        try:
            llm_output = llm("Hi")
        except Exception as e:
            log_content += (
                f"<mark style='color: yellow; background: red'>- Connection failed. "
                f"Got error:\n {str(e)}</mark>"
            )

        if llm_output:
            log_content += (
                "<mark style='background: green; color: white'>- Connection success. "
                "</mark><br>"
            )
        yield log_content

        if llm_output:
            # embedding model
            log_content += f"- Testing Embedding model: {radio_model_value}<br>"
            yield log_content

            emb = embeddings.get(radio_model_value)
            assert emb, f"Embedding model {radio_model_value} not found."

            log_content += "- Sending a message `Hi`<br>"
            yield log_content
            try:
                emb_output = emb("Hi")
            except Exception as e:
                log_content += (
                    f"<mark style='color: yellow; background: red'>"
                    "- Connection failed. "
                    f"Got error:\n {str(e)}</mark>"
                )

            if emb_output:
                log_content += (
                    "<mark style='background: green; color: white'>"
                    "- Connection success. "
                    "</mark><br>"
                )
            yield log_content

        if llm_output and emb_output:
            gr.Info("Setup models completed successfully!")
        else:
            raise gr.Error(
                "Setup models failed. Please verify your connection and API key."
            )

    def update_default_settings(self, radio_model_value, default_settings):
        # revise default settings
        # reranking llm
        default_settings["index.options.1.reranking_llm"] = radio_model_value
        if radio_model_value == "ollama":
            default_settings["index.options.1.use_llm_reranking"] = False

        return default_settings

    def switch_options_view(self, radio_model_value):
        components_visible = [gr.update(visible=False) for _ in range(4)]

        values = ["cohere", "openai", "ollama", "google", None]
        assert radio_model_value in values, f"Invalid value {radio_model_value}"

        if radio_model_value is not None:
            idx = values.index(radio_model_value)
            components_visible[idx] = gr.update(visible=True)

        return components_visible


================================================
FILE: libs/ktem/ktem/reasoning/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/reasoning/base.py
================================================
from typing import Optional

from kotaemon.base import BaseComponent


class BaseReasoning(BaseComponent):
    """The reasoning pipeline that handles each of the user chat messages

    This reasoning pipeline has access to:
        - the retrievers
        - the user settings
        - the message
        - the conversation id
        - the message history
    """

    @classmethod
    def get_info(cls) -> dict:
        """Get the pipeline information for the app to organize and display

        Returns:
            a dictionary that contains the following keys:
                - "id": the unique id of the pipeline
                - "name": the human-friendly name of the pipeline
                - "description": the overview short description of the pipeline, for
                user to grasp what does the pipeline do
        """
        raise NotImplementedError

    @classmethod
    def get_user_settings(cls) -> dict:
        """Get the default user settings for this pipeline"""
        return {}

    @classmethod
    def get_pipeline(
        cls,
        user_settings: dict,
        state: dict,
        retrievers: Optional[list["BaseComponent"]] = None,
    ) -> "BaseReasoning":
        """Get the reasoning pipeline for the app to execute

        Args:
            user_setting: user settings
            state: conversation state
            retrievers (list): List of retrievers
        """
        return cls()

    def run(self, message: str, conv_id: str, history: list, **kwargs):  # type: ignore
        """Execute the reasoning pipeline"""
        raise NotImplementedError


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/__init__.py
================================================
from .decompose_question import DecomposeQuestionPipeline
from .fewshot_rewrite_question import FewshotRewriteQuestionPipeline
from .mindmap import CreateMindmapPipeline
from .rewrite_question import RewriteQuestionPipeline

__all__ = [
    "DecomposeQuestionPipeline",
    "FewshotRewriteQuestionPipeline",
    "RewriteQuestionPipeline",
    "CreateMindmapPipeline",
]


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
================================================
import logging

from ktem.llms.manager import llms
from ktem.reasoning.prompt_optimization.rewrite_question import RewriteQuestionPipeline
from pydantic import BaseModel, Field

from kotaemon.base import Document, HumanMessage, Node, SystemMessage
from kotaemon.llms import ChatLLM

logger = logging.getLogger(__name__)


class SubQuery(BaseModel):
    """Search over a database of insurance rulebooks or financial reports"""

    sub_query: str = Field(
        ...,
        description="A very specific query against the database.",
    )


class DecomposeQuestionPipeline(RewriteQuestionPipeline):
    """Decompose user complex question into multiple sub-questions

    Args:
        llm: the language model to rewrite question
        lang: the language of the answer. Currently support English and Japanese
    """

    llm: ChatLLM = Node(
        default_callback=lambda _: llms.get("openai-gpt4-turbo", llms.get_default())
    )
    DECOMPOSE_SYSTEM_PROMPT_TEMPLATE = (
        "You are an expert at converting user complex questions into sub questions. "
        "Perform query decomposition using provided function_call. "
        "Given a user question, break it down into the most specific sub"
        " questions you can (at most 3) "
        "which will help you answer the original question. "
        "Each sub question should be about a single concept/fact/idea. "
        "If there are acronyms or words you are not familiar with, "
        "do not try to rephrase them."
    )
    prompt_template: str = DECOMPOSE_SYSTEM_PROMPT_TEMPLATE

    def create_prompt(self, question):
        schema = SubQuery.model_json_schema()
        function = {
            "name": schema["title"],
            "description": schema["description"],
            "parameters": schema,
        }
        llm_kwargs = {
            "tools": [{"type": "function", "function": function}],
            "tool_choice": "auto",
            "tools_pydantic": [SubQuery],
        }

        messages = [
            SystemMessage(content=self.prompt_template),
            HumanMessage(content=question),
        ]

        return messages, llm_kwargs

    def run(self, question: str) -> list:  # type: ignore
        messages, llm_kwargs = self.create_prompt(question)
        result = self.llm(messages, **llm_kwargs)
        tool_calls = result.additional_kwargs.get("tool_calls", None)
        sub_queries = []
        if tool_calls:
            for tool_call in tool_calls:
                if "function" in tool_call:
                    # openai and cohere format
                    function_output = tool_call["function"]["arguments"]
                else:
                    # anthropic format
                    function_output = tool_call["args"]

                if isinstance(function_output, str):
                    sub_query = SubQuery.parse_raw(function_output).sub_query
                else:
                    sub_query = SubQuery.parse_obj(function_output).sub_query

                sub_queries.append(
                    Document(
                        content=sub_query,
                    )
                )

        return sub_queries


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
================================================
import json
import uuid
from pathlib import Path

from ktem.components import get_docstore, get_vectorstore
from ktem.llms.manager import llms
from ktem.reasoning.prompt_optimization.rewrite_question import (
    DEFAULT_REWRITE_PROMPT,
    RewriteQuestionPipeline,
)
from theflow.settings import settings as flowsettings

from kotaemon.base import AIMessage, Document, HumanMessage, Node, SystemMessage
from kotaemon.embeddings import BaseEmbeddings
from kotaemon.llms import ChatLLM
from kotaemon.storages import BaseDocumentStore, BaseVectorStore


class FewshotRewriteQuestionPipeline(RewriteQuestionPipeline):
    """Rewrite user question

    Args:
        llm: the language model to rewrite question
        rewrite_template: the prompt template for llm to paraphrase a text input
        lang: the language of the answer. Currently support English and Japanese
        embedding: the embedding model to encode the question
        vector_store: the vector store to store the encoded question
        doc_store: the document store to store the original question
        k: the number of examples to retrieve for rewriting
    """

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    rewrite_template: str = DEFAULT_REWRITE_PROMPT
    lang: str = "English"
    embedding: BaseEmbeddings
    vector_store: BaseVectorStore
    doc_store: BaseDocumentStore
    k: int = getattr(flowsettings, "N_PROMPT_OPT_EXAMPLES", 3)

    def add_documents(self, examples, batch_size: int = 50):
        print("Adding fewshot examples for rewriting")
        documents = []
        for example in examples:
            doc = Document(
                text=example["input"], id_=str(uuid.uuid4()), metadata=example
            )
            documents.append(doc)

        for i in range(0, len(documents), batch_size):
            embeddings = self.embedding(documents[i : i + batch_size])
            ids = [t.doc_id for t in documents[i : i + batch_size]]
            self.vector_store.add(
                embeddings=embeddings,
                ids=ids,
            )
            self.doc_store.add(documents[i : i + batch_size])

    @classmethod
    def get_pipeline(
        cls,
        embedding,
        example_path=Path(__file__).parent / "rephrase_question_train.json",
        collection_name: str = "fewshot_rewrite_examples",
    ):
        vector_store = get_vectorstore(collection_name)
        doc_store = get_docstore(collection_name)

        pipeline = cls(
            embedding=embedding, vector_store=vector_store, doc_store=doc_store
        )
        if doc_store.count():
            return pipeline

        examples = json.load(open(example_path, "r"))
        pipeline.add_documents(examples)

        return pipeline

    def run(self, question: str) -> Document:  # type: ignore
        emb = self.embedding(question)[0].embedding
        _, _, ids = self.vector_store.query(embedding=emb, top_k=self.k)
        examples = self.doc_store.get(ids)
        messages = [SystemMessage(content="You are a helpful assistant")]
        for example in examples:
            messages.append(
                HumanMessage(
                    content=self.rewrite_template.format(
                        question=example.metadata["input"], lang=self.lang
                    )
                )
            )
            messages.append(AIMessage(content=example.metadata["output"]))
        messages.append(
            HumanMessage(
                content=self.rewrite_template.format(question=question, lang=self.lang)
            )
        )

        result = self.llm(messages)
        return result


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/mindmap.py
================================================
import logging
from textwrap import dedent

from ktem.llms.manager import llms

from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage
from kotaemon.llms import ChatLLM, PromptTemplate

logger = logging.getLogger(__name__)


MINDMAP_HTML_EXPORT_TEMPLATE = dedent(
    """
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Mindmap</title>
    <style>
      svg.markmap {
        width: 100%;
        height: 100vh;
      }
    </style>
    <script src="https://cdn.jsdelivr.net/npm/markmap-autoloader@0.16"></script>
  </head>
  <body>
    {markmap_div}
  </body>
</html>
"""
)


class CreateMindmapPipeline(BaseComponent):
    """Create a mindmap from the question and context"""

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())

    SYSTEM_PROMPT = """
From now on you will behave as "MapGPT" and, for every text the user will submit, you are going to create a PlantUML mind map file for the inputted text to best describe main ideas. Format it as a code and remember that the mind map should be in the same language as the inputted context. You don't have to provide a general example for the mind map format before the user inputs the text.
    """  # noqa: E501
    MINDMAP_PROMPT_TEMPLATE = """
Question:
{question}

Context:
{context}

Generate a sample PlantUML mindmap for based on the provided question and context above. Only includes context relevant to the question to produce the mindmap.

Use the template like this:

@startmindmap
* Title
** Item A
*** Item B
**** Item C
*** Item D
@endmindmap
    """  # noqa: E501
    prompt_template: str = MINDMAP_PROMPT_TEMPLATE

    @classmethod
    def convert_uml_to_markdown(cls, text: str) -> str:
        start_phrase = "@startmindmap"
        end_phrase = "@endmindmap"

        try:
            text = text.split(start_phrase)[-1]
            text = text.split(end_phrase)[0]
            text = text.strip().replace("*", "#")
        except IndexError:
            text = ""

        return text

    def run(self, question: str, context: str) -> Document:  # type: ignore
        prompt_template = PromptTemplate(self.prompt_template)
        prompt = prompt_template.populate(
            question=question,
            context=context,
        )

        messages = [
            SystemMessage(content=self.SYSTEM_PROMPT),
            HumanMessage(content=prompt),
        ]

        uml_text = self.llm(messages).text
        markdown_text = self.convert_uml_to_markdown(uml_text)

        return Document(
            text=markdown_text,
        )


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json
================================================
[
  {
    "input": "What was the percentage increase in General and Administrative expenses in the year 2018 compared to the previous year?",
    "output": "What was the increase in the General and administrative in 2018?"
  },
  {
    "input": "What was the specific percentage increase in the General and Administrative expenses during the year 2018?",
    "output": "What was the increase in the General and administrative in 2018?"
  },
  {
    "input": "What was the decrease in the Other net expense in 2019?",
    "output": "What was the specific amount of the decrease in the Other net expense in the financial year of 2019?"
  },
  {
    "input": "What was the decrease in the Other net expense in 2019?",
    "output": "What was the specific decrease amount experienced in the Other net expense during the year 2019?"
  },
  {
    "input": "What was the increase in interest income?",
    "output": "What was the exact amount by which the interest income increased?"
  },
  {
    "input": "What was the increase in interest income?",
    "output": "What was the exact amount of the increase in interest income?"
  },
  {
    "input": "What was the average Other expense, net for 2018 and 2019?",
    "output": "What was the average net Other expense for the years 2018 and 2019 combined?"
  },
  {
    "input": "In which year was Other expense, net less than 5,000 thousands?",
    "output": "In what particular year did the amount of Other expense, net fall below 5,000 thousands?"
  },
  {
    "input": "What were the income tax expenses in the years 2019, 2018, and 2017, respectively?",
    "output": "What was the income tax expense in 2019,2018 and 2017 respectively?"
  },
  {
    "input": "\"What were the income tax expenses for the years 2019, 2018, and 2017?\"",
    "output": "What was the income tax expense in 2019,2018 and 2017 respectively?"
  },
  {
    "input": "How much money did the company have in unremitted earnings for the year 2019?",
    "output": "What was the company's unremitted earnings in 2019?"
  },
  {
    "input": "How much unremitted earnings did the company have in 2019, without releasing or distributing them?",
    "output": "What was the company's unremitted earnings in 2019?"
  },
  {
    "input": "Which year witnessed interest and penalties that amounted to less than 20 thousand?",
    "output": "In which year was Interest and penalties less than 20 thousand?"
  },
  {
    "input": "What is the net carrying amount in 2019?",
    "output": "What is the net carrying amount for the year 2019? Please provide details to assist in providing a more accurate response."
  },
  {
    "input": "What is the net carrying amount in 2019?",
    "output": "What is the net carrying amount for the year 2019? Can you provide more details or context about the specific asset, liability or financial element in consideration?"
  },
  {
    "input": "What is the change in the debt discount, net of amortization from December 31, 2019 to December 31, 2018?",
    "output": "What was the net change in the debt discount's value, after taking into account amortization, between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Net carrying amount from December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in the net carrying amount between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?",
    "output": "What is the net difference in the value of furniture and equipment between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?",
    "output": "What was the net change in the value of Furniture and equipment between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in System hardware from December 31, 2019 to December 31, 2018?",
    "output": "What are the differences in the hardware of the System between December 31, 2019, and December 31, 2018?"
  },
  {
    "input": "What was the exact percentage increase in the cost of revenue between 2017 and 2018?",
    "output": "What was the increase in the cost of revenue in 2018?"
  },
  {
    "input": "What was the percentage increase in the total amount spent on expenses directly related to generating revenue in 2018 compared to the previous year?",
    "output": "What was the increase in the cost of revenue in 2018?"
  },
  {
    "input": "In which year was cost of revenue less than 40,000 thousands?",
    "output": "In what specific year did the cost of revenue fall below the amount of 40,000 thousands?"
  },
  {
    "input": "If a company used its earnings to finance its domestic operations, what are the potential outcomes or consequences of this decision?",
    "output": "What would happen if earnings were used to fund domestic operations?"
  },
  {
    "input": "What was the change in Cash, cash equivalents, and restricted cash at the beginning of the period, comparing December 31, 2019, to December 31, 2018?",
    "output": "What is the change in Cash, cash equivalents and restricted cash at beginning of period from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What is the net difference in Cash provided by operating activities between December 31, 2019 and December 31, 2018?",
    "output": "What is the change in Cash provided by operating activities from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What is the change in Cash used in investing activities from December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in the amount of Cash used in investing activities between December 31, 2019, and December 31, 2018?"
  },
  {
    "input": "What was the percentage change in the Sales and marketing expense from 2017 to 2018?",
    "output": "What was the increase in the Sales and marketing expense in 2018 compared to 2017?"
  },
  {
    "input": "In which year was Sales and marketing expenses less than 50,000 thousands?",
    "output": "In what specific year did the total expenses for sales and marketing fall below 50,000 thousands (50 million) currency units?"
  },
  {
    "input": "In which year was General and administrative expenses less than 50,000 thousands?",
    "output": "In which specific year did the amount for General and Administrative expenses fall below 50,000 thousands (50 million) dollars?"
  },
  {
    "input": "What is the average Carrying Value for the period December 31, 2019 to December 31, 2018?",
    "output": "What is the average Carrying Value from December 31, 2018 to December 31, 2019?"
  },
  {
    "input": "In which specific year did the fair value of the 2022 Notes measure below 200,000 thousands?",
    "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?"
  },
  {
    "input": "In the year when the fair value of the 2022 Notes was below 200,000 thousands, what specific year was it?",
    "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?"
  },
  {
    "input": "What was the 2022 Notes cap price?",
    "output": "\"What was the cap price for the 2022 Notes? Please provide any additional details that could assist in retrieving the information accurately.\""
  },
  {
    "input": "What were the average expenses for sales and marketing in the years 2018 and 2019?",
    "output": "What was the average Sales and marketing expenses for 2018 and 2019?"
  },
  {
    "input": "What were the average expenses for Sales and Marketing in 2018 and 2019, respectively?",
    "output": "What was the average Sales and marketing expenses for 2018 and 2019?"
  },
  {
    "input": "In which year was Sales and marketing expenses less than 90,000 thousands?",
    "output": "When was the year that the sales and marketing expenses were recorded to be under 90,000 thousands?"
  },
  {
    "input": "In which year was Sales and marketing expenses less than 90,000 thousands?",
    "output": "In which specific year did the total expenses for sales and marketing amount to a figure less than 90,000 thousands?"
  },
  {
    "input": "What was the exact amount of gross profit recorded in the fiscal year of 2018?",
    "output": "What was the Gross profit in 2018?"
  },
  {
    "input": "What was the specific value of the gross profit for the year 2018?",
    "output": "What was the Gross profit in 2018?"
  },
  {
    "input": "In what specific year did the revenue fall below one hundred thousand thousands (100,000,000)?",
    "output": "In which year was revenue less than 100,000 thousands?"
  },
  {
    "input": "\"In which specific year did the revenue fall below 100,000 thousands (100 million)?\"",
    "output": "In which year was revenue less than 100,000 thousands?"
  },
  {
    "input": "What is the difference in the Gross Profit between the fiscal years 2018 and 2019?",
    "output": "What is the change in the Gross Profit from 2018 to 2019?"
  },
  {
    "input": "What were the capitalized software development costs for the company in the years ending on December 31, 2017?",
    "output": "What was the company capitalized software development costs during the years ended December 31, 2017?"
  },
  {
    "input": "What does the Adjusted EBITDA represent?",
    "output": "What is the meaning and purpose of the term Adjusted EBITDA?"
  },
  {
    "input": "What does the Adjusted EBITDA represent?",
    "output": "What is the meaning and significance of Adjusted EBITDA?"
  },
  {
    "input": "\"What were the federal statutory income tax rates in 2019, 2018, and 2017?\"",
    "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?"
  },
  {
    "input": "What were the federal statutory income tax rates in 2019, 2018, and 2017?",
    "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?"
  },
  {
    "input": "In what year did the State income tax expense, taking into account the federal tax effect, fall below negative one thousand thousands?",
    "output": "In which year was State income tax expense, net of federal tax effect less than (1,000) thousands?"
  },
  {
    "input": "What were the amounts of the Other expense, net in 2018 and 2017?",
    "output": "What was the Other expense, net in 2018 and 2017 respectively?"
  },
  {
    "input": "What is the difference in the amount of money owed from customers (net accounts receivable) between December 31, 2019, and December 31, 2018?",
    "output": "What is the change in Net accounts receivable from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What are the respective federal income tax expense at statutory rates in 2018 and 2019?",
    "output": "What were the federal income tax expenses at statutory rates for the years 2018 and 2019?"
  },
  {
    "input": "What were the specific state income tax rates, after accounting for federal benefit deductions, for the years 2017 and 2018?",
    "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?"
  },
  {
    "input": "What were the state income tax rates, after accounting for federal benefit, for both the years 2017 and 2018?",
    "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?"
  },
  {
    "input": "What are the respective service revenue at March 31 and June 30, 2018?",
    "output": "What were the service revenues as of March 31 and June 30, 2018?"
  },
  {
    "input": "What are the respective service revenue at June 30 and September 30, 2018?",
    "output": "What is the service revenue as of June 30 and September 30, 2018?"
  },
  {
    "input": "What are the respective service revenue at June 30 and September 30, 2018?",
    "output": "What is the amount of service revenue reported on the financial statements for June 30 and September 30, 2018?"
  },
  {
    "input": "What percentage of the total revenue generated in North America in 2019 can be attributed to the non-core segment?",
    "output": "What is the value of the revenue from the non-core segment from North America as a percentage of the total revenue earned in North America in 2019?"
  },
  {
    "input": "What is the average amount of net cash generated from operating activities for the years 2018 and 2019?",
    "output": "What is the average net cash provided by operating activities in 2018 and 2019?"
  },
  {
    "input": "What is the amount of cash-based equity compensation expense included in network operations expenses in both 2018 and 2017?",
    "output": "What are the respective on-cash equity-based compensation expense included in network operations expenses in 2018 and 2017?"
  },
  {
    "input": "What are the respective service revenue in 2018 and 2019?",
    "output": "What was the amount of service revenue in the year 2018 and 2019 respectively?"
  },
  {
    "input": "What is the total amount of interest and principal payments required for the debt relating to the 2022 Notes?",
    "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?"
  },
  {
    "input": "What is the total amount of both interest and principal that needs to be paid for the debt included in the 2022 Notes?",
    "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?"
  },
  {
    "input": "What is the total amount of debt that the company needs to repay within the next 3 years?",
    "output": "What is the company's total debt due within 3 years?"
  },
  {
    "input": "What is the aggregate amount of debt that the company needs to repay within the next three years?",
    "output": "What is the company's total debt due within 3 years?"
  },
  {
    "input": "What is the total amount of finance lease obligations that the company needs to repay within the next 3 years?",
    "output": "What is the company's total finance lease obligations due within 3 years?"
  },
  {
    "input": "What were the specific values of system infrastructure in the years 2018 and 2019?",
    "output": "What are the respective values of system infrastructure in 2018 and 2019?"
  },
  {
    "input": "What were the values of system infrastructure in 2018 and 2019?",
    "output": "What are the respective values of system infrastructure in 2018 and 2019?"
  },
  {
    "input": "What is the percentage increase or decrease in the value of network equipment from 2018 to 2019?",
    "output": "What is the percentage change in the value of network equipment between 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in the value of leasehold improvements between 2018 and 2019?",
    "output": "What is the precise percentage increase or decrease in the value of leasehold improvements from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the value of system infrastructure between 2018 and 2019?",
    "output": "What is the percentage difference in the value of system infrastructure from 2018 to 2019?"
  },
  {
    "input": "What were the pre-tax domestic incomes for 2018 and 2019?",
    "output": "What are the respective domestic income before income taxes in 2018 and 2019?"
  },
  {
    "input": "What was the mean pre-tax domestic income for individuals in the years 2017 and 2018?",
    "output": "What is the average domestic income before income taxes in 2017 and 2018?"
  },
  {
    "input": "What were the average pre-tax domestic incomes in the years 2017 and 2018?",
    "output": "What is the average domestic income before income taxes in 2017 and 2018?"
  },
  {
    "input": "What was the average pre-tax domestic income for households in 2018 and 2019?",
    "output": "What is the average domestic income before income taxes in 2018 and 2019?"
  },
  {
    "input": "What is the average number of basic weighted average common shares in 2018 and 2019?",
    "output": "What is the mean value of the basic weighted average common shares for the years 2018 and 2019?"
  },
  {
    "input": "What is the average number of basic weighted average common shares in 2018 and 2019?",
    "output": "What is the average number of basic weighted average common shares for the years 2018 and 2019 combined?"
  },
  {
    "input": "What was the average impact on stock dilution from stock options in 2018 and 2019?",
    "output": "What is the average dilutive effect of stock options in 2018 and 2019?"
  },
  {
    "input": "What was the average impact of stock options on dilution for the years 2018 and 2019?",
    "output": "What is the average dilutive effect of stock options in 2018 and 2019?"
  },
  {
    "input": "What does the cost of operations represent?",
    "output": "What is the meaning or significance of the term \"cost of operations\"?"
  },
  {
    "input": "How many quarters had operating revenues that was below $2,000 million?",
    "output": "How many quarters had operating revenues below $2,000 million? Can you provide a breakdown of the number of quarters where the operating revenues were below $2,000 million?"
  },
  {
    "input": "What was the exact change in Operating revenues from the Three Months Ended March to the Three Months Ended June?",
    "output": "What was the change in Operating revenues between Three Months Ended March and June?"
  },
  {
    "input": "What was the difference in operating revenues for the Three Months Ended March and the Three Months Ended June?",
    "output": "What was the change in Operating revenues between Three Months Ended March and June?"
  },
  {
    "input": "What was the amount of net property and equipment in 2015?",
    "output": "What was the exact monetary value of net property and equipment in the year 2015?"
  },
  {
    "input": "What was the amount of net property and equipment in 2015?",
    "output": "What was the specific amount of net property and equipment recorded on the financial statements for the year 2015?"
  },
  {
    "input": "What was the value of redeemable noncontrolling interests in the year 2019?",
    "output": "What were the Redeemable noncontrolling interests in 2019?"
  },
  {
    "input": "What was the value of the redeemable noncontrolling interests in 2019?",
    "output": "What were the Redeemable noncontrolling interests in 2019?"
  },
  {
    "input": "What was the percent increase or decrease in the equity of American Tower Corporation from 2015 to 2016 in the United States?",
    "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?"
  },
  {
    "input": "What was the percentage increase or decrease in the equity of American Tower Corporation from 2015 to 2016?",
    "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?"
  },
  {
    "input": "What was the difference in the overall value of assets from 2017 to 2018?",
    "output": "What was the change in total assets between 2017 and 2018?"
  },
  {
    "input": "What was the total amount received from the sales of senior notes in the year 2019 after deducting any expenses or fees?",
    "output": "What were the net Proceeds from issuance of senior notes in 2019?"
  },
  {
    "input": "What was the total amount of money received from the repayment of credit facilities in 2018?",
    "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?"
  },
  {
    "input": "\"What was the total amount received or repaid through credit facilities in 2018?\"",
    "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?"
  },
  {
    "input": "What was the value of Equipment in 2018?",
    "output": "What was the total monetary worth or market value of Equipment in the year 2018?"
  },
  {
    "input": "What was the value of Equipment in 2018?",
    "output": "What was the monetary worth of the Equipment in the year 2018?"
  },
  {
    "input": "What was the exact monetary worth of buildings and any related improvements in the year 2019?",
    "output": "What was the value of buildings and improvements in 2019?"
  },
  {
    "input": "What was the difference in the value of land and improvements from 2018 to 2019?",
    "output": "What was the change in Land and improvements between 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the net value of Property and equipment between the years 2018 and 2019?",
    "output": "What was the percentage change in Property and equipment, net between 2018 and 2019?"
  },
  {
    "input": "What are the components that make up redevelopment capital expenditures?",
    "output": "What does Redevelopment capital expenditures include?"
  },
  {
    "input": "What actions are authorized by the company's Buyback Programs?",
    "output": "What do the company's Buyback Programs authorize the company to do?"
  },
  {
    "input": "What was the change in the total number of shares purchased between November and December?",
    "output": "How much did the total number of shares purchased increase or decrease from November to December?"
  },
  {
    "input": "What was the total value of the Property, Plant, and Equipment (PPE) in 2016?",
    "output": "What was the gross PPE in 2016?"
  },
  {
    "input": "What was the total value of personal protective equipment (PPE) in terms of sales or revenue for the year 2016?",
    "output": "What was the gross PPE in 2016?"
  },
  {
    "input": "For how many years has the total value of gross intangibles been above $15,000 million?",
    "output": "How many years did gross intangibles exceed $15,000 million?"
  },
  {
    "input": "How many consecutive years did the value of gross intangibles surpass $15,000 million?",
    "output": "How many years did gross intangibles exceed $15,000 million?"
  },
  {
    "input": "What was the percentage change in gross goodwill between 2018 and 2019?",
    "output": "What was the percentage difference in the amount of gross goodwill from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in gross goodwill between 2018 and 2019?",
    "output": "What was the percentage difference in gross goodwill from 2018 to 2019?"
  },
  {
    "input": "\"What was the overall amount paid in lease payments for the operating lease agreement?\"",
    "output": "What were the total lease payments for operating lease?"
  },
  {
    "input": "What was the sum total of all lease payments made for an operating lease?",
    "output": "What were the total lease payments for operating lease?"
  },
  {
    "input": "\"What was the total amount of operating lease expenses incurred during the fiscal years 2020, 2021, and 2022?\"",
    "output": "What was the sum of operating lease in fiscal years 2020-2022?"
  },
  {
    "input": "What is the proportion of non-current lease liability in relation to the total lease liability, expressed as a percentage?",
    "output": "What is non-current lease liability as a percentage of Total lease liability?"
  },
  {
    "input": "Which years did the company allocate a valuation allowance?",
    "output": "In which years did the company provide a valuation allowance?"
  },
  {
    "input": "For how many years, starting from January 1, was the balance consistently higher than $100 million?",
    "output": "How many years was the balance as of January 1 above $100 million?"
  },
  {
    "input": "How many years, starting from January 1, had a balance above $100 million?",
    "output": "How many years was the balance as of January 1 above $100 million?"
  },
  {
    "input": "What specific details about the company's taxes can be found in their state tax returns?",
    "output": "What do the company's state tax returns reflect?"
  },
  {
    "input": "In 2017, how many instances were there where the deferred Income tax benefit (provision) exceeded $50 million?",
    "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?"
  },
  {
    "input": "How many instances occurred in 2017 where the deferred income tax benefit (provision) exceeded $50 million?",
    "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?"
  },
  {
    "input": "In 2019, what is the number of income tax benefits (provisions) that exceeded $(4 million)?",
    "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?"
  },
  {
    "input": "In 2019, what is the total number of income tax benefits (provisions) that exceeded $(4 million)?",
    "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?"
  },
  {
    "input": "What was the change in the net cash provided by (used for) by investing activities between 2017 and 2018?",
    "output": "What was the difference in the net cash provided by (used for) investing activities from 2017 to 2018?"
  },
  {
    "input": "What was the percentage change in diluted net income per share attributable to common stockholders of American Tower Corporation between the Three Months Ended in March and June?",
    "output": "What was the change in Diluted net income per share attributable to American Tower Corporation common stockholders between Three Months Ended  March and June?"
  },
  {
    "input": "What was the specific difference in the value of Prepaids and other current assets between the fiscal years 2018 and 2019?",
    "output": "What was the change in Prepaids and other current assets between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount of Prepaids and other current assets reported in the financial statements between the years 2018 and 2019?",
    "output": "What was the change in Prepaids and other current assets between 2018 and 2019?"
  },
  {
    "input": "What was the total amount of property and real estate taxes that were accumulated or owed in 2018?",
    "output": "What was the Accrued property and real estate taxes in 2018?"
  },
  {
    "input": "What was the change in Other accrued expenses between 2018 and 2019?",
    "output": "What was the difference in the amount of Other accrued expenses recorded in 2018 compared to 2019?"
  },
  {
    "input": "What was the change in Other accrued expenses between 2018 and 2019?",
    "output": "What was the difference in the amount of Other accrued expenses recorded between the years 2018 and 2019?"
  },
  {
    "input": "What was the total amount of Net operating loss carryforwards recorded for the financial year 2019?",
    "output": "What were the Net operating loss carryforwards in 2019?"
  },
  {
    "input": "What were the amounts of the liability decreases included in the company's revisions in estimates for both 2019 and 2018?",
    "output": "How much were the decreases to the liability included in the revisions in estimates by the company in 2019 and 2018 respectively?"
  },
  {
    "input": "What was the change in balance as of December 31 between 2018 and 2019?",
    "output": "What was the difference in the balance as of December 31st, 2018 and December 31st, 2019?"
  },
  {
    "input": "What was the value of the Eure-et-Loir interests as of December 31, 2019?",
    "output": "What was the monetary worth of the Eure-et-Loir interests on the specific date of December 31, 2019?"
  },
  {
    "input": "How many of the non-current liabilities components in 2018 were above $500 million?",
    "output": "How many components of non-current liabilities in the year 2018 exceeded $500 million in value?"
  },
  {
    "input": "How many of the non-current liabilities components in 2018 were above $500 million?",
    "output": "In 2018, how many components of non-current liabilities exceeded $500 million in value?"
  },
  {
    "input": "How many years has the expense for stock-based compensation for Total exceeded $100 million?",
    "output": "How many years did Total stock-based compensation expense exceed $100 million?"
  },
  {
    "input": "For how many years has the expense for Stock-based compensation - Services been higher than $1 million?",
    "output": "How many years did Stock-based compensation expense - Services exceed $1 million?"
  },
  {
    "input": "What differentiates the characteristics of Federal and State NOLs from 2025 to 2029?",
    "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?"
  },
  {
    "input": "What differentiates Federal and State NOLs in the 2025-2029 timeframe, and how do they vary during this period?",
    "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?"
  },
  {
    "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?",
    "output": "What was the estimated percentage of the company's total revenue in 2018 that came from their property segments, specifically from their communication sites?"
  },
  {
    "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?",
    "output": "What percentage of the company's total revenue in 2018 can be attributed to their communication sites within their property segments?"
  },
  {
    "input": "What percentage of the total revenue in the United States in 2019 can be attributed to property segments?",
    "output": "How many percent of total revenue in 2019 was accounted for by property segments in the U.S.?"
  },
  {
    "input": "How many percent of total revenue in 2018 was accounted for by property segments in Asia?",
    "output": "What was the percentage of the total revenue in 2018 contributed by the property segments specifically in Asia?"
  },
  {
    "input": "What were the intangible assets related to the Tower and network location specifically in the year 2019?",
    "output": "What were the Tower and network location intangible assets in 2019?"
  },
  {
    "input": "What were the intangible assets related to the tower and network locations observed in 2019?",
    "output": "What were the Tower and network location intangible assets in 2019?"
  },
  {
    "input": "How many years were the total impairment charges above $200 million?",
    "output": "How many years did the total impairment charges exceed or surpass $200 million?"
  },
  {
    "input": "How many years were the total impairment charges above $200 million?",
    "output": "For how many years did the total impairment charges exceed $200 million?"
  },
  {
    "input": "What was the percentage change in Total impairment charges between 2018 and 2019?",
    "output": "What percentage represents the difference in Total impairment charges between the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in Total impairment charges between 2018 and 2019?",
    "output": "What is the percentage difference in Total impairment charges from 2018 to 2019?"
  },
  {
    "input": "For how long has the sum of all assets reached or surpassed $40,000 million?",
    "output": "How many years did the total assets exceed $40,000 million?"
  },
  {
    "input": "What was the balance as at January 1, 2019?",
    "output": "What was the amount of money in the account on January 1, 2019?"
  },
  {
    "input": "What was the difference in account balance on January 1st, comparing 2018 to 2019?",
    "output": "What was the change in balance as of January 1 between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the account balance between January 1, 2018 and January 1, 2019?",
    "output": "What was the change in balance as of January 1 between 2018 and 2019?"
  },
  {
    "input": "What is the difference in the percentage increase of current year for 2017 and 2018?",
    "output": "What was the change in current year increases between 2017 and 2018?"
  },
  {
    "input": "What was the difference in the yearly increases of current year between 2017 and 2018?",
    "output": "What was the change in current year increases between 2017 and 2018?"
  },
  {
    "input": "What was the total amount spent on depreciation in the year 2019?",
    "output": "What was the depreciation cost in 2019?"
  },
  {
    "input": "What was the change in depreciation between 2018 and 2019?",
    "output": "What was the difference in the amount of depreciation incurred in 2018 compared to 2019?"
  },
  {
    "input": "What was the gross amount of accumulated depreciation at the beginning of 2017 and how did it change by the beginning of 2018?",
    "output": "What was the change in Gross amount of accumulated depreciation at beginning between 2017 and 2018?"
  },
  {
    "input": "Which specific years have the statute of limitations expired on certain unrecognized tax benefits?",
    "output": "Which years did the statute of limitations on certain unrecognized tax benefits lapse?"
  },
  {
    "input": "In 2019, what was the total value of receivables that had not yet been invoiced to customers?",
    "output": "What were the unbilled receivables in 2019?"
  },
  {
    "input": "What was the change in Unbilled receivables between 2018 and 2019?",
    "output": "What was the net difference in Unbilled receivables from 2018 to 2019?"
  },
  {
    "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?",
    "output": "\"What was the difference in the amount of money invested in Nippon Yttrium Co., Ltd (\"NYC\") from 2018 to 2019?\""
  },
  {
    "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?",
    "output": "What was the difference in the amount of investments made into Nippon Yttrium Co., Ltd (\"NYC\") during the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in total investments between 2018 and 2019?",
    "output": "What was the exact percentage difference in the total amount of investments from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in Pro forma revenues from 2017 to 2018?",
    "output": "What was the change in Pro forma revenues between 2017 and 2018?"
  },
  {
    "input": "How many years did net income exceed $100,000 thousand?",
    "output": "For how many consecutive years has the net income surpassed $100,000 thousand?"
  },
  {
    "input": "How many years did net income exceed $100,000 thousand?",
    "output": "For how many consecutive years has the net income been greater than $100,000 thousand?"
  },
  {
    "input": "What was the specific financial difference in adjustments made to the defined benefit post-retirement plan between the years 2017 and 2019?",
    "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?"
  },
  {
    "input": "What was the difference in the adjustments made to Defined Benefit post-retirement plans between the years 2017 and 2019?",
    "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?"
  },
  {
    "input": "What were the total sales revenues in Hong Kong in the year 2018?",
    "output": "What was the net sales in Hong Kong in 2018?"
  },
  {
    "input": "What was the exact amount of net sales generated in Singapore during the year 2017?",
    "output": "What was the net sales in Singapore in 2017?"
  },
  {
    "input": "What was the change in net sales in Mexico between 2017 and 2018?",
    "output": "What was the difference in net sales in Mexico from 2017 to 2018 and how does it impact the overall sales performance in the country?"
  },
  {
    "input": "In which specific years did the cumulative net sales in every region surpass one million dollars?",
    "output": "Which years did the total net sales in all regions exceed $1,000,000 thousand?"
  },
  {
    "input": "What was the percentage change in the net sales from Other Countries between 2018 and 2019?",
    "output": "What was the percentage difference in net sales from Other Countries in 2019 compared to 2018?"
  },
  {
    "input": "What was the difference in the amount of Amortization of debt issuance costs between the years 2017 and 2018?",
    "output": "What was the change in the Amortization of debt issuance costs between 2017 and 2018?"
  },
  {
    "input": "For how many years did the interest expense on capital leases surpass $200 thousand?",
    "output": "How many years did Interest expense on capital leases exceed $200 thousand?"
  },
  {
    "input": "By what percentage did the total interest expense increase or decrease from 2018 to 2019?",
    "output": "What was the percentage change in the total interest expense between 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the total interest expense from 2018 to 2019?",
    "output": "What was the percentage change in the total interest expense between 2018 and 2019?"
  },
  {
    "input": "What was the total net actuarial loss or gain specifically associated with Other Benefits for the year 2018?",
    "output": "What was the Net actuarial loss (gain) for Other Benefits in 2018?"
  },
  {
    "input": "What was the change in net sales between 2017 and 2019?",
    "output": "What is the difference in net sales from 2017 to 2019?"
  },
  {
    "input": "For how many years was the cost of sales higher than $800,000 thousand?",
    "output": "How many years did cost of sales exceed $800,000 thousand?"
  },
  {
    "input": "For how many years has the cost of sales been higher than $800,000 thousand?",
    "output": "How many years did cost of sales exceed $800,000 thousand?"
  },
  {
    "input": "What was the precise net value of property, plant, and equipment in Japan specifically for the year 2019?",
    "output": "What was the net amount of property, plant and equipment in Japan in 2019?"
  },
  {
    "input": "What was the net amount of property, plant and equipment in Thailand in 2018?",
    "output": "What was the exact net value of property, plant, and equipment in Thailand during the year 2018?"
  },
  {
    "input": "How many years did Total net property, plant and equipment from Non-United States regions exceed $400,000 thousand?",
    "output": "How many years was the total value of property, plant, and equipment from regions outside the United States greater than $400,000 thousand?"
  },
  {
    "input": "What was the change in the net property, plant and equipment in China between 2018 and 2019?",
    "output": "What was the exact difference in the net value of property, plant, and equipment in China during the period from 2018 to 2019?"
  },
  {
    "input": "What was the net interest expense in 2019?",
    "output": "What was the amount of interest expenses incurred in 2019, after subtracting any interest income earned during the same year?"
  },
  {
    "input": "What was the change in the Loss on early extinguishment of debt between 2018 and 2019?",
    "output": "What was the difference in the Loss on early extinguishment of debt from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the net interest expense from 2018 to 2019?",
    "output": "What was the percentage change in the net interest expense between 2018 and 2019?"
  },
  {
    "input": "What percentage increase or decrease occurred in the net interest expense from 2018 to 2019?",
    "output": "What was the percentage change in the net interest expense between 2018 and 2019?"
  },
  {
    "input": "What was the change in the Returns reserves between 2018 and 2019?",
    "output": "What was the difference in the amount allocated to Returns reserves between the fiscal years 2018 and 2019?"
  },
  {
    "input": "How much did the Additions for tax positions change from 2018 to 2019 in the current year?",
    "output": "What was the change in the Additions for tax positions of the current year between 2018 and 2019?"
  },
  {
    "input": "What was the reflection of Pension and other post-retirement benefits?",
    "output": "What did Pension and other post-retirement benefits reflect?"
  },
  {
    "input": "What were the interest obligations that had payments due more than 5 years?",
    "output": "What were the interest obligations that had payments due for a period longer than 5 years?"
  },
  {
    "input": "What was the precise difference in the total amount between Employee separation liability and Restructuring liability?",
    "output": "What was the difference in the total between Employee separation liability and Restructuring liability?"
  },
  {
    "input": "What is the discrepancy in the total amount between Employee Separation Liability and Restructuring Liability?",
    "output": "What was the difference in the total between Employee separation liability and Restructuring liability?"
  },
  {
    "input": "What was the distinction in the payment amounts expected in Year 1 for interest obligations compared to operating lease obligations?",
    "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?"
  },
  {
    "input": "What were the contrasting payment amounts between Year 1 for interest obligations and operating lease obligations?",
    "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?"
  },
  {
    "input": "What is the percentage of total debt obligations in relation to the overall contractual obligations?",
    "output": "What were the total debt obligations as a percentage of the total contractual obligations?"
  },
  {
    "input": "What was the amount of cash generated or used in financing activities during the year 2017?",
    "output": "What was the Net cash provided by (used in) financing activities in 2017?"
  },
  {
    "input": "What was the percentage change in the amount of cash, cash equivalents, and restricted cash from 2018 to 2019?",
    "output": "What was the percentage change in the Net increase in cash, cash equivalents, and restricted cash between 2018 and 2019?"
  },
  {
    "input": "How much of a discount did Customer Advances receive?",
    "output": "What was the amount of discount for Customer Advances?"
  },
  {
    "input": "What was the total amount of discount provided for Customer Advances?",
    "output": "What was the amount of discount for Customer Advances?"
  },
  {
    "input": "What was the net gain or loss from acquisitions in the year 2018?",
    "output": "What was the Acquisition (gain) loss in 2018?"
  },
  {
    "input": "What were the restructuring charges in 2017?",
    "output": "\"What were the specific charges related to restructuring expenses incurred during the year 2017?\""
  },
  {
    "input": "What were the restructuring charges in 2017?",
    "output": "What were the specific charges related to the corporate restructuring that occurred in 2017?"
  },
  {
    "input": "For how many years has the income tax expense from continuing operations exceeded $10,000 thousand?",
    "output": "How many years did Total current income tax expense from continuing operations exceed $10,000 thousand?"
  },
  {
    "input": "What was the difference in foreign income tax expense between 2017 and 2018?",
    "output": "What was the change in current foreign income tax expense between 2017 and 2018?"
  },
  {
    "input": "What was the difference in the amount of foreign income tax expense recorded for the years 2017 and 2018?",
    "output": "What was the change in current foreign income tax expense between 2017 and 2018?"
  },
  {
    "input": "What was the percentage change in the deferred federal income tax expense between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the amount of deferred federal income tax expense from 2018 to 2019?"
  },
  {
    "input": "What were the restructuring charges in 2019?",
    "output": "What were the specific charges incurred as part of the restructuring efforts in the year 2019?"
  },
  {
    "input": "What was the change in Personnel reduction costs between 2017 and 2018?",
    "output": "What was the difference in the costs associated with reducing personnel between the years 2017 and 2018?"
  },
  {
    "input": "What is the percentage change in the costs associated with relocation and exit from 2018 to 2019?",
    "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?"
  },
  {
    "input": "What is the percentage difference in the Relocation and exit costs from 2018 to 2019?",
    "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?"
  },
  {
    "input": "What was the change in Foreign income between 2017 and 2018?",
    "output": "What was the difference in the amount of income from foreign sources recorded for the years 2017 and 2018?"
  },
  {
    "input": "What were the net sales in 2019?",
    "output": "What was the total value of sales generated in 2019?"
  },
  {
    "input": "What were the net sales in 2019?",
    "output": "What was the total amount of revenue generated from sales in the year 2019?"
  },
  {
    "input": "\"What was the gross margin percentage for the quarter ending on September 30th?\"",
    "output": "What was the gross margin for the Sep-30 quarter?"
  },
  {
    "input": "\"What was the exact gross margin percentage for the quarter ending on September 30th?\"",
    "output": "What was the gross margin for the Sep-30 quarter?"
  },
  {
    "input": "\"For which specific time periods did the Gross Margin exceed $120,000 thousand?\"",
    "output": "Which quarters ended did the Gross Margin exceed $120,000 thousand?"
  },
  {
    "input": "What was the total value of contract assets in 2019?",
    "output": "What were the amount of contract assets in 2019?"
  },
  {
    "input": "What were the specific values of contract assets recorded in the year 2019?",
    "output": "What were the amount of contract assets in 2019?"
  },
  {
    "input": "What was the difference in the amount of prepaid expenses between the fiscal years 2018 and 2019?",
    "output": "What was the change in prepaid expenses between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount of prepaid expenses between the years 2018 and 2019?",
    "output": "What was the change in prepaid expenses between 2018 and 2019?"
  },
  {
    "input": "What was the percentage difference in the variance between Software Solutions and Data and Analytics?",
    "output": "What was the difference in the percent variance between Software Solutions and Data and Analytics?"
  },
  {
    "input": "What was the reported amount of net trade receivables in 2017?",
    "output": "What were the net trade receivables as reported in 2017?"
  },
  {
    "input": "What was the adjustmentments for ASC 606 adoption for net computer software?",
    "output": "What adjustments were made for the adoption of ASC 606 in relation to net computer software? Please provide a detailed explanation of the adjustments made for net computer software under ASC 606."
  },
  {
    "input": "What was the adjustmentments for ASC 606 adoption for net computer software?",
    "output": "What were the adjustments made for the adoption of ASC 606 regarding net computer software?"
  },
  {
    "input": "In which specific years did the total value of Deferred contract costs surpass $30 million?",
    "output": "Which years did Deferred contract costs exceed $30 million?"
  },
  {
    "input": "What was the change in cash equivalents between 2018 and 2019?",
    "output": "What was the difference in the amount of cash equivalents reported for the years 2018 and 2019?"
  },
  {
    "input": "What was the change in cash equivalents between 2018 and 2019?",
    "output": "What was the difference in the amount of cash equivalents held between the year 2018 and the year 2019?"
  },
  {
    "input": "What was the change in Cash between 2018 and 2019?",
    "output": "What was the difference in the Cash amount reported in the financial statements of the company between the years 2018 and 2019?"
  },
  {
    "input": "What was the amount of Equity method investments in 2019?",
    "output": "How much money was invested through the equity method in 2019?"
  },
  {
    "input": "Which years does the table provide information for the EBITDA by segment?",
    "output": "For which years does the table contain EBITDA information segmented by different categories?"
  },
  {
    "input": "Which years does the table provide information for the EBITDA by segment?",
    "output": "For which specific years does the table provide EBITDA data segmented by different categories?"
  },
  {
    "input": "What was the total value of EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization) for the year 2018?",
    "output": "What was the sum of the EBITDA in 2018?"
  },
  {
    "input": "\"What was the specific monetary balance amount for Software Solutions in the year 2017?\"",
    "output": "What was the balance in 2017 for Software Solutions?"
  },
  {
    "input": "What was the specific balance amount, in USD, for Software Solutions in the year 2017?",
    "output": "What was the balance in 2017 for Software Solutions?"
  },
  {
    "input": "What was the specific amount of federal income tax expense incurred in the fiscal year of 2019?",
    "output": "What was the current federal income tax expense in 2019?"
  },
  {
    "input": "Which years does the table provide information for depreciation and amortization by segment?",
    "output": "For which specific periods does the table offer data regarding depreciation and amortization broken down by segment?"
  },
  {
    "input": "What was the value of the total assets before the implementation of ASC 606 revenue recognition standards?",
    "output": "What were the total assets without the adoption of ASC 606?"
  },
  {
    "input": "What was the sum of all assets before the implementation of ASC 606?",
    "output": "What were the total assets without the adoption of ASC 606?"
  },
  {
    "input": "How did the adoption of ASC 606 affect the difference between Total Assets and Total Liabilities?",
    "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?"
  },
  {
    "input": "What is the impact of ASC 606 Adoption on the difference between Total Assets and Total Liabilities?",
    "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?"
  },
  {
    "input": "What was the percentage change in cash flows from operating activities in 2019 compared to 2018?",
    "output": "What was the cash flows provided by operating activities in 2019 as a percentage of the cash flow in 2018?"
  },
  {
    "input": "For how many consecutive years did the Net increase in cash and cash equivalents remain positive?",
    "output": "How many years was the Net  increase in cash and cash equivalents positive?"
  },
  {
    "input": "What was the EBITDA difference specifically recorded between the Software Solutions division and the Data and Analytics division?",
    "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?"
  },
  {
    "input": "What was the exact difference in EBITDA (earnings before interest, taxes, depreciation, and amortization) between Software Solutions and Data and Analytics?",
    "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?"
  },
  {
    "input": "What specific expenses were included in the category of operating expenses for Corporate and Other?",
    "output": "What did operating expenses for Corporate and Other include?"
  },
  {
    "input": "What was the discrepancy or variance between the overall value of assets and the specific intangible asset of goodwill in the context of data and analytics?",
    "output": "What was the difference between the total assets and goodwill from data and analytics?"
  },
  {
    "input": "What was the difference between Operating expenses and Revenues from Software Solutions?",
    "output": "What differentiates Operating expenses and Revenues from Software Solutions? Please provide a detailed explanation comparing the two."
  },
  {
    "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?",
    "output": "What was the percentage difference in the Net earnings margin from 2017 to 2019?"
  },
  {
    "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?",
    "output": "What was the exact percentage difference in the net earnings margin from 2017 to 2019?"
  },
  {
    "input": "What was the percentage change in revenues between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in revenues from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in revenues between 2018 and 2019?",
    "output": "What was the percentage difference in the amount of money earned in revenue from 2018 to 2019?"
  },
  {
    "input": "What were the specific prepaid expenses incurred during the year 2018?",
    "output": "What were the prepaid expenses in 2018?"
  },
  {
    "input": "What were the specific categories of Other current assets in the financial statements for the year 2019?",
    "output": "What were the Other current assets in 2019?"
  },
  {
    "input": "What was the change in contract assets between 2018 and 2019?",
    "output": "What was the difference in the amount of contract assets from 2018 to 2019?"
  },
  {
    "input": "How many years did prepaid expenses exceed $40.0 million?",
    "output": "For how many years was the amount of prepaid expenses higher than $40.0 million?"
  },
  {
    "input": "How many years did the operating margin exceed 20.0%?",
    "output": "For how many years was the operating margin greater than 20.0%?"
  },
  {
    "input": "What was the amount of Corporate Services in 2018?",
    "output": "What was the specific amount of funds allocated to Corporate Services during the year 2018?"
  },
  {
    "input": "What was the amount of Corporate Services in 2018?",
    "output": "How much did Corporate Services amount to in the year 2018?"
  },
  {
    "input": "What specific factors or elements were taken into consideration when calculating the debt?",
    "output": "What did the calculation for Debt include?"
  },
  {
    "input": "What was the difference between Total Debt and Total Interest on Debt?",
    "output": "What is the distinction between the total amount of debt and the total interest incurred on that debt?"
  },
  {
    "input": "How many contracts with a value exceeding $100 million were made during the period of 2021-2022?",
    "output": "For the period 2021-2022, how many contractual obligations exceeded $100 million?"
  },
  {
    "input": "What is the dollar difference between the actual cost and the estimated cost for software solutions?",
    "output": "What was the dollar variance for software solutions?"
  },
  {
    "input": "What was the specific revenue generated from the field of Data and Analytics during the year 2017?",
    "output": "What was revenue from Data and Analytics in 2017?"
  },
  {
    "input": "For how many consecutive years has revenue from Data and Analytics surpassed $150 million?",
    "output": "How many years did revenue from Data and Analytics exceed $150 million?"
  },
  {
    "input": "What was the average amount of money earned between the years 2017 and 2018?",
    "output": "What was the average total revenue between 2017 and 2018?"
  },
  {
    "input": "What was the average revenue from Corporate and Other between 2017 and 2018?",
    "output": "What was the average revenue generated from the Corporate and Other category during the two-year period encompassing 2017 and 2018?"
  },
  {
    "input": "What was the change in purchased software between 2018 and 2019?",
    "output": "What was the difference in the amount of software that was bought between the years 2018 and 2019?"
  },
  {
    "input": "For how many years has the internally developed software generated revenue exceeding $800 million?",
    "output": "How many years did internally developed software exceed $800 million?"
  },
  {
    "input": "For how long did internally developed software generate revenue exceeding $800 million?",
    "output": "How many years did internally developed software exceed $800 million?"
  },
  {
    "input": "What was the percentage change in net computer software between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in net computer software from 2018 to 2019?"
  },
  {
    "input": "How many years did Percentage of gross lease receivables exceed 2.0%?",
    "output": "For how many years was the percentage of gross lease receivables above 2.0%?"
  },
  {
    "input": "How many years did Percentage of gross lease receivables exceed 2.0%?",
    "output": "For how long has the percentage of gross lease receivables been above 2.0%?"
  },
  {
    "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?",
    "output": "What is the percentage difference in the allowance for doubtful accounts from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the Allowance for doubtful accounts from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region between 2017 and 2018?",
    "output": "What was the change for revenue from EMEA between 2017 and 2018?"
  },
  {
    "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region from 2017 to 2018?",
    "output": "What was the change for revenue from EMEA between 2017 and 2018?"
  },
  {
    "input": "Why would actual maturities differ from the contractual maturities?",
    "output": "Why do the actual time periods until maturity sometimes vary from the specified contractual time periods?"
  },
  {
    "input": "What is the distinction in amortized cost between debt investments held for less than 1 year compared to those held for a period ranging from 1 year to 5 years?",
    "output": "What was the difference between the amortized cost of debt investments that were within 1 year and after 1 year through 5 years?"
  },
  {
    "input": "What was the total amount of money allocated to the gradual write-off of acquired intangible assets throughout the year 2018?",
    "output": "What was the total amortization of purchased intangible assets in 2018?"
  },
  {
    "input": "What was the difference in the variance in dollars between Available-for-sale debt investments and net Other gains (losses)?",
    "output": "What was the variance in dollars between the variance in Available-for-sale debt investments and the variance in net Other gains (losses)?"
  },
  {
    "input": "What was the percentage change in the net other gains (losses) between 2017 and 2018?",
    "output": "What was the percent change in the net amount of other gains (losses) from 2017 to 2018?"
  },
  {
    "input": "What were the dividends in 2018?",
    "output": "What were the dividend payments made in the year 2018?"
  },
  {
    "input": "What were the dividends in 2018?",
    "output": "What was the total amount of dividends paid out in the year 2018?"
  },
  {
    "input": "What units are being utilized in the table and what purpose do they serve in the given context?",
    "output": "What are the units used in the table?"
  },
  {
    "input": "What was the difference in the balance at the start of the fiscal year in 2017 compared to 2018?",
    "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?"
  },
  {
    "input": "What was the difference in the balance at the start of the fiscal year from 2017 to 2018?",
    "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?"
  },
  {
    "input": "How does the company typically recommend analyzing their liquidity and capital resources for future periods?",
    "output": "How does the company believe that their liquidity and capital resources in future periods should be analyzed?"
  },
  {
    "input": "What differentiates operating leases with durations of less than 1 year from those lasting 1 to 3 years?",
    "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?"
  },
  {
    "input": "How did the characteristics of operating leases differ between those with a duration of less than 1 year and those lasting from 1 to 3 years?",
    "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?"
  },
  {
    "input": "What percentage of the total contractual obligations is represented by the senior notes?",
    "output": "What was the total senior notes as a percentage of total contractual obligations?"
  },
  {
    "input": "What is the period that has the greatest Operating leases?",
    "output": "What is the time frame during which Operating leases are most prevalent?"
  },
  {
    "input": "How does the company manage and minimize the potential financial loss related to credit risk in relation to derivatives?",
    "output": "How does the company mitigate credit risk associated with derivatives?"
  },
  {
    "input": "Which years does the table provide information for the company's outstanding derivatives?",
    "output": "For which specific years does the table contain information regarding the outstanding derivatives of the company?"
  },
  {
    "input": "Which years does the table provide information for the company's outstanding derivatives?",
    "output": "For which specific years does the table present information about the company's exceptional derivatives?"
  },
  {
    "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?",
    "output": "What was the difference in the amount of net investment hedging instruments between 2018 and 2019?"
  },
  {
    "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?",
    "output": "What was the difference in the amount of Net investment hedging instruments between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in the total amount of outstanding derivatives between 2018 and 2019?",
    "output": "What was the percentage difference in the total value of outstanding derivatives from 2018 to 2019?"
  },
  {
    "input": "How much was the total amount spent on Purchased Considerations specifically for BroadSoft?",
    "output": "What was the amount of Purchased Considerations for BroadSoft?"
  },
  {
    "input": "What were the differences in terms of Purchase Consideration specifically between Springpath and Broadsoft? Please provide additional details and insights to help understand this comparison better.",
    "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?"
  },
  {
    "input": "What was the specific variance in terms of Purchase Consideration when comparing Springpath and Broadsoft?",
    "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?"
  },
  {
    "input": "What was the amount of unearned income specifically derived from lease receivables in the year 2019?",
    "output": "What was unearned income from lease receivables in 2019?"
  },
  {
    "input": "\"What was the amount of unearned income generated specifically from lease receivables during the year 2019?\"",
    "output": "What was unearned income from lease receivables in 2019?"
  },
  {
    "input": "What was the difference in the reported total between current and noncurrent financing receivables?",
    "output": "How much greater or lesser was the total reported amount of financing receivables between current and noncurrent categories?"
  },
  {
    "input": "What were the inclusions of foreign exchange transactions and other related activities?",
    "output": "What did foreign exchange and other include?"
  },
  {
    "input": "What was the difference, expressed as a percentage, in the balance of financing receivables at the end of fiscal year 2018 compared to the balance at the end of fiscal year 2019?",
    "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?"
  },
  {
    "input": "What percentage increase or decrease occurred in the balance of financing receivables at the end of the fiscal year from 2018 to 2019?",
    "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?"
  },
  {
    "input": "How many years did Revenue exceed $50,000 million?",
    "output": "How many consecutive years has the revenue exceeded $50,000 million?"
  },
  {
    "input": "How many years did Revenue exceed $50,000 million?",
    "output": "For how many complete years has the revenue consistently surpassed the $50,000 million mark?"
  },
  {
    "input": "What is the difference in the amount of Goodwill between Duo and Luxtera and how does it impact their overall value?",
    "output": "What was the difference in Goodwill between Duo and Luxtera?"
  },
  {
    "input": "What were the variations in the level of Goodwill between Duo and Luxtera?",
    "output": "What was the difference in Goodwill between Duo and Luxtera?"
  },
  {
    "input": "What was the discrepancy in the value of purchased intangible assets between Luxtera and other companies?",
    "output": "What was the difference in Purchased intangible assets between Luxtera and Others?"
  },
  {
    "input": "What were the variations in the value of purchased intangible assets specifically between Luxtera and other companies/entities?",
    "output": "What was the difference in Purchased intangible assets between Luxtera and Others?"
  },
  {
    "input": "What is the acquisition with the highest Purchase Consideration?",
    "output": "What is the highest purchase consideration acquisition?"
  },
  {
    "input": "What was the total value of technology products purchased before accounting for any deductions or expenses?",
    "output": "What was the amount of gross purchased technology?"
  },
  {
    "input": "What is the net value difference between Technology and Customer relationships?",
    "output": "What was the difference in the net values between Technology and Customer relationships?"
  },
  {
    "input": "What was the numerical difference in the net values specifically attributed to Technology and Customer relationships in relation to a certain context?",
    "output": "What was the difference in the net values between Technology and Customer relationships?"
  },
  {
    "input": "Which years does the table provide information for the company's activity related to their product warranty liability?",
    "output": "For which specific years does the table display data regarding the company's product warranty liability?"
  },
  {
    "input": "Which years does the table provide information for the company's activity related to their product warranty liability?",
    "output": "For which years does the table displayed provide information about the company's activity in terms of their product warranty liability?"
  },
  {
    "input": "What were the Acquisitions and divestitures in 2019?",
    "output": "What were the acquisitions and divestitures that took place in the year 2019?"
  },
  {
    "input": "What was the difference in the amount allocated for warranty provisions issued in 2017 compared to 2018?",
    "output": "What was the change in Provisions for warranties issued between 2017 and 2018?"
  },
  {
    "input": "What was the difference in the amount allocated for warranties issued between the years 2017 and 2018?",
    "output": "What was the change in Provisions for warranties issued between 2017 and 2018?"
  },
  {
    "input": "For which specific years does the table offer data on cash and cash equivalents as well as investments?",
    "output": "Which years does the table provide information for  cash and cash equivalents and investments?"
  },
  {
    "input": "What was the change in the value of available-for-sale debt investments, indicating whether there was an increase or decrease?",
    "output": "What was the increase (decrease) in available-for-sale debt investments?"
  },
  {
    "input": "In 2019, how did the distinction between cash and cash equivalents and available-for-sale debt investments manifest and how were they different from each other?",
    "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?"
  },
  {
    "input": "What differentiates cash and cash equivalents from available-for-sale debt investments in the financial records for the year 2019?",
    "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?"
  },
  {
    "input": "What was the change in Interest expense between 2017 and 2018?",
    "output": "What is the difference in the amount of interest expense incurred from 2017 to 2018?"
  },
  {
    "input": "What was the change in Interest expense between 2017 and 2018?",
    "output": "What was the difference in the amount of money paid for interest in 2017 compared to 2018?"
  },
  {
    "input": "What was the difference in the amount of income earned from interest between the years 2018 and 2019?",
    "output": "What was the change in Interest income between 2018 and 2019?"
  },
  {
    "input": "What is the amount of federal net operating loss carryforwards that the company had for income tax purposes in the year 2019?",
    "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?"
  },
  {
    "input": "What was the amount of the company's federal net operating loss carryforwards specifically for income tax purposes in the year 2019?",
    "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?"
  },
  {
    "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?",
    "output": "What is the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?"
  },
  {
    "input": "In fiscal year 2019, what was the total amount of net interest expense recognized by the company?",
    "output": "How much net interest expense did the company recognize in fiscal year 2019?"
  },
  {
    "input": "How much interest expense, after deducting interest income, did the company record as a net amount during fiscal year 2019?",
    "output": "How much net interest expense did the company recognize in fiscal year 2019?"
  },
  {
    "input": "What were the Additions for tax positions of prior years in 2019?",
    "output": "What were the additions made to the tax positions of previous years in the year 2019?"
  },
  {
    "input": "What was the percentage change in net income between 2018 and 2019?",
    "output": "What was the percentage difference in net income from 2018 to 2019 and how can it be calculated?"
  },
  {
    "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?",
    "output": "Why has the direct impact of changes in foreign currency exchange rates on our revenue not had a significant financial effect so far? Please provide an explanation for the lack of material impact despite fluctuations in currency values."
  },
  {
    "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?",
    "output": "Why has the impact of foreign currency fluctuations on revenue not been significant enough to have a material effect?"
  },
  {
    "input": "What was the difference in the fair value of forward contracts that were sold between the years 2018 and 2019?",
    "output": "What was the change in the fair value of sold forward contracts between 2018 and 2019?"
  },
  {
    "input": "Why does the company prioritize inventory and supply chain management as a specific area of focus?",
    "output": "Why is inventory and supply chain management an area of focus for the company?"
  },
  {
    "input": "Why does the company prioritize inventory and supply chain management as a focal point for its operations?",
    "output": "Why is inventory and supply chain management an area of focus for the company?"
  },
  {
    "input": "What was the total sum of purchase commitments made in the year 2018?",
    "output": "What were the total purchase commitments in 2018?"
  },
  {
    "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?",
    "output": "What was the difference in purchase commitments of less than 1 year between 2018 and 2019?"
  },
  {
    "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?",
    "output": "What was the difference in purchase commitments shorter than one year between the year 2018 and the year 2019?"
  },
  {
    "input": "For how long did contracts ranging from 1 to 3 years surpass a cumulative worth of $700 million?",
    "output": "How many years did commitments that were 1 to 3 years exceed $700 million?"
  },
  {
    "input": "What was the percentage change in product revenue for the APJC region from 2017 to 2018?",
    "output": "What was the change in the product revenue from APJC between 2017 and 2018?"
  },
  {
    "input": "What was the change in the product revenue from Americas between 2017 and 2018?",
    "output": "What was the difference in the amount of revenue generated from products in the Americas region between the years 2017 and 2018?"
  },
  {
    "input": "What was the numerical difference in the total value of net property and equipment in International regions between the years 2017 and 2018?",
    "output": "What was the change in net property and equipment from International regions between 2017 and 2018?"
  },
  {
    "input": "What is the percentage increase or decrease in the total net property and equipment from 2018 to 2019?",
    "output": "What was the percentage change in the total net property and equipment between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in revenue generated from security services between 2017 and 2018?",
    "output": "What was the change in revenue from security between 2017 and 2018?"
  },
  {
    "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
    "output": "What was the total payment for the shares from April 28, 2019 to May 25, 2019 and from May 26, 2019 to June 22, 2019?"
  },
  {
    "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
    "output": "What was the overall payment for the shares between April 28, 2019 and May 25, 2019, as well as between May 26, 2019 and June 22, 2019?"
  },
  {
    "input": "What was the average total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
    "output": "What was the average total payment for shares from April 28, 2019 to June 22, 2019, and how does it compare between the periods April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?"
  },
  {
    "input": "What was the difference in the balance of Americas between the years 2018 and 2019?",
    "output": "What was the change in balance from Americas between 2018 and 2019?"
  },
  {
    "input": "What was the difference in America's balance between 2018 and 2019 and how did it change?",
    "output": "What was the change in balance from Americas between 2018 and 2019?"
  },
  {
    "input": "Which years does the table provide information for the company's available-for-sale debt investments and equity investments?",
    "output": "For which time period does the table provide data on the available-for-sale debt investments and equity investments of the company?"
  },
  {
    "input": "\"What were the total provisions, also known as benefits, provided in this context?",
    "output": "What were the total provisions (benefits)?"
  },
  {
    "input": "What was the difference between total provisions (benefits) and net recoveries (write-offs)?",
    "output": "What differentiates total provisions, also referred to as benefits, from net recoveries, which are write-offs?"
  },
  {
    "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?",
    "output": "In 2018, what was the proportion or ratio of the allowance for credit loss specifically assigned to financed service contracts compared to the total allowance for credit loss?"
  },
  {
    "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?",
    "output": "What was the percentage of the total allowance for credit loss in 2018 that was allocated specifically for financed service contracts?"
  },
  {
    "input": "What is the percentage difference in Loss from continuing operations before income tax between the years 2018 and 2019?",
    "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?"
  },
  {
    "input": "What is the percentage difference in the Loss from continuing operations before income tax between the years 2018 and 2019?",
    "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the income tax benefit from 2018 to 2019?",
    "output": "What is the percentage difference in the income tax benefit received between 2018 and 2019?"
  },
  {
    "input": "In which year did the net loss for the year exceed the net loss of any other year?",
    "output": "In which year is there a greater net loss for the year?"
  },
  {
    "input": "What does the net deferred tax liabilities include?",
    "output": "What is included in the calculation of net deferred tax liabilities?"
  },
  {
    "input": "What is the exact percentage change in the net deferred tax liabilities from 2018 to 2019?",
    "output": "What is the percentage change in the net deferred tax liabilities from 2018 to 2019?"
  },
  {
    "input": "What was the parent entity's current ratio in 2019?",
    "output": "What is the current ratio of the parent entity in 2019?"
  },
  {
    "input": "What was the parent entity's current ratio in 2019? Please provide the numerical value.",
    "output": "What is the current ratio of the parent entity in 2019?"
  },
  {
    "input": "What was the debts to assets ratio of the parent entity in the year 2018? Can you provide the specific ratio value or percentage?",
    "output": "What is the debts to assets ratio of the parent entity in 2018?"
  },
  {
    "input": "What is the debt-to-assets ratio of the parent company for the year 2018?",
    "output": "What is the debts to assets ratio of the parent entity in 2018?"
  },
  {
    "input": "What is the percentage change in the net cash generated or used for investing activities?",
    "output": "What is the percentage change in the net cash provided from investing activities?"
  },
  {
    "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?",
    "output": "What is the difference between the net cash provided from operating activities in 2018 and 2019?"
  },
  {
    "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?",
    "output": "What is the difference in the amount of net cash provided from operating activities between 2018 and 2019?"
  },
  {
    "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?",
    "output": "What is the difference in the amount of cash and cash equivalents between 2018 and 2019?"
  },
  {
    "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?",
    "output": "What is the difference in the net change in cash and cash equivalents between the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the value of current assets from 2018 to 2019?",
    "output": "What is the percentage change in current assets from 2018 to 2019?"
  },
  {
    "input": "What is the current ratio in the year 2019 and what factors should be considered while determining it?",
    "output": "What is the current ratio in 2019?"
  },
  {
    "input": "What was the debts to assets ratio for the year 2019?",
    "output": "What is the debts to assets ratio in 2019?"
  },
  {
    "input": "\"What was the debts to assets ratio for the year 2019?\"",
    "output": "What is the debts to assets ratio in 2019?"
  },
  {
    "input": "What is the purpose and significance of trial commissions, and how are they defined?",
    "output": "What are trial commissions?"
  },
  {
    "input": "\"What is the purpose and definition of trial commissions?\"",
    "output": "What are trial commissions?"
  },
  {
    "input": "\"How does the Group calculate or determine its total income or financial proceeds?\"",
    "output": "How does the Group determine its revenue?"
  },
  {
    "input": "\"How does the Group calculate or establish its revenue? Provide details about the methods or criteria used for determining the Group's revenue.\"",
    "output": "How does the Group determine its revenue?"
  },
  {
    "input": "What is the percentage difference in the amount of upfront fees charged from 2018 to 2019?",
    "output": "What is the percentage change in upfront fees from 2018 to 2019?"
  },
  {
    "input": "What is the percentage difference in upfront fees between 2018 and 2019?",
    "output": "What is the percentage change in upfront fees from 2018 to 2019?"
  },
  {
    "input": "What is the change in the Business combination reserve between 2018 and 2019?",
    "output": "What is the monetary difference in the Business combination reserve from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the total reserves from 2018 to 2019?",
    "output": "By what percentage did the total reserves increase or decrease from 2018 to 2019?"
  },
  {
    "input": "What is the percentage increase or decrease in the trail commission asset comparing the values from 2018 and 2019?",
    "output": "What is the percentage change in the current trail commission asset from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the cash receipts from 2018 to 2019?",
    "output": "What is the percentage increase or decrease in the cash receipts comparing the amounts in 2018 and 2019?"
  },
  {
    "input": "What are the payroll tax rates for both 2019 and 2018?",
    "output": "What is the payroll tax for 2019 and 2018 respectively?"
  },
  {
    "input": "\"What was the amount of post-tax loss incurred from discontinued operations in the year 2019?\"",
    "output": "What is the Post-tax loss of  discontinued operations in 2019?"
  },
  {
    "input": "\"In which year did the revenue increase compared to other years?\"",
    "output": "In which year is there a higher revenue?"
  },
  {
    "input": "In which specific year did the revenue increase, resulting in a higher amount compared to previous years?",
    "output": "In which year is there a higher revenue?"
  },
  {
    "input": "What is the percentage difference in the amount of lease liabilities between 2018 and 2019?",
    "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?"
  },
  {
    "input": "What is the exact percentage difference in the lease liabilities as of 2018 compared to 2019?",
    "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?"
  },
  {
    "input": "What method does the Company use to determine and calculate the amount for its provision?",
    "output": "How does the Company calculate its provision?"
  },
  {
    "input": "How does the Company determine the amount of money set aside for provisions and what factors are taken into consideration during the calculation process?",
    "output": "How does the Company calculate its provision?"
  },
  {
    "input": "What is the percentage change in the contract assets from 2018 to 2019?",
    "output": "What is the percentage change in the value of the contract assets from the year 2018 to the year 2019?"
  },
  {
    "input": "In which year does the number of outstanding shares have a higher value at the start of the period as compared to other years?",
    "output": "In which year is the number of outstanding shares at the beginning of the period higher?"
  },
  {
    "input": "What is the pre-tax discount rate that was utilized to determine the value of projected cash flows for the year 2019?",
    "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?"
  },
  {
    "input": "What was the pre-tax discount rate used to calculate cash flow projections in 2019?",
    "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?"
  },
  {
    "input": "In which year is the health CGU higher?",
    "output": "\"In which specific year does the health Comprehensive Grand Unit (CGU) exhibit a higher value compared to other years?\""
  },
  {
    "input": "In which year is the car CGU higher?",
    "output": "\"In which specific year does the car CGU (Car Gross Utilization) exhibit a higher value compared to other years? Please provide the year.\""
  },
  {
    "input": "What is the post-employment benefits in 2018?",
    "output": "What were the post-employment benefits in 2018, and can you provide more details about them?"
  },
  {
    "input": "What were the share-based payment transactions and their impact in 2019?",
    "output": "What is the share-based payments in 2019?"
  },
  {
    "input": "What were the share-based payments in 2019 and could you provide further details on this topic?",
    "output": "What is the share-based payments in 2019?"
  },
  {
    "input": "What is the percentage change in the post-employment benefits from 2018 to 2019?",
    "output": "What is the percentage difference in post-employment benefits between the years 2018 and 2019?"
  },
  {
    "input": "What specific expenses or expenditure categories are included in the unallocated corporate costs for the current year?",
    "output": "What do the unalloacated corporate costs include in the current year?"
  },
  {
    "input": "What is the exact percentage change in revenue in Australia from the year 2018 to the year 2019?",
    "output": "What is the percentage change in the revenue in Australia from 2018 to 2019?"
  },
  {
    "input": "What is the revenue percentage change experienced in Asia between 2018 and 2019?",
    "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in revenue in the Asian region from 2018 to 2019?",
    "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?"
  },
  {
    "input": "What was the total number of outstanding stock options for VMware in 2017?",
    "output": "What was the outstanding number of shares for VMware stock options in 2017?"
  },
  {
    "input": "What was the total number of shares allotted for VMware stock options in the year 2017?",
    "output": "What was the outstanding number of shares for VMware stock options in 2017?"
  },
  {
    "input": "What was the number of granted shares from Pivotal Stock Options in 2018?",
    "output": "\"How many shares were granted by Pivotal Stock Options during the year 2018?\""
  },
  {
    "input": "For how long did the Weighted-Average Exercise Price (per share) for VMware stock options remain above $60.00?",
    "output": "How many years did the outstanding Weighted-Average Exercise Price (per share) for VMware stock options exceed $60.00?"
  },
  {
    "input": "What was the difference in the overall amount of money earned by the company in 2018 compared to 2019?",
    "output": "What was the change in total revenue between 2018 and 2019?"
  },
  {
    "input": "What was the difference in total revenue between the years 2018 and 2019?",
    "output": "What was the change in total revenue between 2018 and 2019?"
  },
  {
    "input": "How many years did total revenue exceed $500 million?",
    "output": "For how many consecutive years has the total revenue been greater than $500 million?"
  },
  {
    "input": "What was the percentage change in net income between 2019 and 2020?",
    "output": "What was the percentage difference in net income from 2019 to 2020, and how can we calculate it accurately?"
  },
  {
    "input": "\"What was the variation in the increase in goodwill associated with mergers and acquisitions from 2019 to 2020?\"",
    "output": "What was the change in the Increase in goodwill related to business combinations between 2019 and 2020?"
  },
  {
    "input": "What was the difference in balance at the start of 2019 and 2020?",
    "output": "What was the change in balance at the beginning of the year between 2019 and 2020?"
  },
  {
    "input": "What was the difference in balance at the beginning of 2019 and 2020?",
    "output": "What was the change in balance at the beginning of the year between 2019 and 2020?"
  },
  {
    "input": "What did the Senior Notes consists of?",
    "output": "What were the contents or components of the Senior Notes?"
  },
  {
    "input": "What was the difference between total Senior Notes and total Notes payable to Dell?",
    "output": "What is the distinction between the combined value of Senior Notes and the combined value of Notes payable to Dell?"
  },
  {
    "input": "What differentiates the total amount of Term Loans from the total amount of Future Lease Commitments?",
    "output": "What was the difference between total Term Loans and total Future Lease Commitments?"
  },
  {
    "input": "What differentiates the total amount of Term Loans from the total value of Future Lease Commitments? Elaborate on the distinctions between these two financial concepts.",
    "output": "What was the difference between total Term Loans and total Future Lease Commitments?"
  },
  {
    "input": "Which specific time periods are covered in the table containing data on long-lived assets categorized by geographic area? These assets primarily consist of net property and equipment.",
    "output": "Which years does the table include information for long-lived assets by geographic area, which primarily include property and equipment, net?"
  },
  {
    "input": "How long has the outstanding balance owed to related parties been surpassing $100 million?",
    "output": "How many years did current amounts due to related parties exceed $100 million?"
  },
  {
    "input": "What was the cumulative amount of sales invoices acknowledged and recorded as revenue throughout the entire fiscal year of 2020?",
    "output": "What was the total billings recognized during the year ended 2020?"
  },
  {
    "input": "\"What was the exact amount of billings that were recorded and acknowledged as revenue throughout the entirety of the year that concluded in 2020?\"",
    "output": "What was the total billings recognized during the year ended 2020?"
  },
  {
    "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?",
    "output": "\"What was the difference in Unearned software maintenance revenue from 2019 to 2020 and how did it change?\""
  },
  {
    "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?",
    "output": "What was the difference in Unearned software maintenance revenue from 2019 to 2020?"
  },
  {
    "input": "What is the percentage of unearned revenue in the total revenue for the years 2019 and 2020?",
    "output": "What was the percentage of total unearned revenue between 2019 and 2020?"
  },
  {
    "input": "\"What was the percentage of unearned revenue as a proportion of the total revenue generated in the years 2019 and 2020?\"",
    "output": "What was the percentage of total unearned revenue between 2019 and 2020?"
  },
  {
    "input": "What was the average price per share in 2018, taking into account the weight assigned to each share?",
    "output": "What was the Weighted-average price per share in 2018?"
  },
  {
    "input": "For how many years did the repurchase of Class A common stock surpass a cumulative value of $10,000 million?",
    "output": "How many years did Class A common stock repurchased exceed $10,000 million?"
  },
  {
    "input": "For how many years did the repurchase of Class A common stock amount to more than $10 billion?",
    "output": "How many years did Class A common stock repurchased exceed $10,000 million?"
  },
  {
    "input": "For which years does the table provide details regarding the exclusion of weighted-average common share equivalents of Class A common stock from the calculations of diluted net income per share?",
    "output": "Which years does the table provide information for the weighted-average common share equivalents of Class A common stock that were excluded from the diluted net income per share calculations?"
  },
  {
    "input": "What were the employee stock options for the year 2019?",
    "output": "What was the Employee stock options in 2019?"
  },
  {
    "input": "What were the employee stock options offered in 2019?",
    "output": "What was the Employee stock options in 2019?"
  },
  {
    "input": "How does Dell engage in the procurement of products and services from other companies?",
    "output": "How does Dell purchase products and services from the company?"
  },
  {
    "input": "What was the percentage change in the internal-use revenue between 2019 and 2020?",
    "output": "What is the percentage difference in internal-use revenue from 2019 to 2020?"
  },
  {
    "input": "Which years does the table provide information for accrued expenses and other?",
    "output": "For which specific years does the provided table offer information related to accrued expenses and other similar items?"
  },
  {
    "input": "Which years does the table provide information for accrued expenses and other?",
    "output": "For which specific years does the table contain data regarding accrued expenses and other items?"
  },
  {
    "input": "What was the difference in the total amount of expenses that accumulated over time between the years 2019 and 2020?",
    "output": "What was the change in the total accrued expenses between 2019 and 2020?"
  },
  {
    "input": "What is the difference in the total amount of expenses that accumulated over time between 2019 and 2020?",
    "output": "What was the change in the total accrued expenses between 2019 and 2020?"
  },
  {
    "input": "In 2019, what were the specific support and administrative costs incurred by Dell's subsidiary?",
    "output": "What was the Dell subsidiary support and administrative costs in 2019?"
  },
  {
    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
    "output": "What was the difference in the amount spent on purchasing and leasing products as well as purchasing services from 2018 to 2019?"
  },
  {
    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
    "output": "What was the difference in the amount spent on purchasing and leasing products and purchasing services from 2018 to 2019?"
  },
  {
    "input": "For how many consecutive years did Dell subsidiary incur support and administrative costs exceeding $200 million?",
    "output": "How many years did Dell subsidiary support and administrative costs exceed $200 million?"
  },
  {
    "input": "What was the percentage change in the Dell subsidiary support and administrative costs between 2019 and 2020?",
    "output": "What percentage increase or decrease was observed in the support and administrative costs of Dell's subsidiary from 2019 to 2020?"
  },
  {
    "input": "What was the significance or meaning of the aggregate intrinsic value?",
    "output": "What did the aggregate intrinsic value represent?"
  },
  {
    "input": "What differentiates outstanding options that are both exercisable and vested as well as expected to vest?",
    "output": "What was the difference between outstanding options that were exercisable and vested and expected to vest?"
  },
  {
    "input": "What was the specific amount allocated for the federal income tax provision in the year 2019?",
    "output": "What was the current federal income tax provision in 2019?"
  },
  {
    "input": "What was the specific provision for federal income tax in 2019?",
    "output": "What was the current federal income tax provision in 2019?"
  },
  {
    "input": "What was the change in the current federal income tax provision between 2018 and 2019?",
    "output": "What was the difference in the amount allocated for federal income tax in 2018 compared to 2019?"
  },
  {
    "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?",
    "output": "What was the precise percentage increase or decrease in the Total income tax provision from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?",
    "output": "What is the percentage difference in the Total income tax provision from 2018 to 2019?"
  },
  {
    "input": "What was the specific monetary value of Restricted cash categorized under other current assets in the financial records for the year 2019?",
    "output": "What was the amount of Restricted cash within other current assets in 2019?"
  },
  {
    "input": "What were the specific additions made to tax positions related to the current year of 2020 in the field of taxation?",
    "output": "What were the additions to tax positions related to current year in 2020?"
  },
  {
    "input": "What categories are encompassed within the scope of purchases and leases of products, and what does this also include in terms of purchases?",
    "output": "What did Purchases and leases of products and purchases of services include?"
  },
  {
    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
    "output": "What was the year-on-year difference in the amounts spent on purchasing and leasing products and purchasing services between 2018 and 2019?"
  },
  {
    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
    "output": "What was the difference in the amount spent on purchasing and leasing products, as well as purchasing services, from 2018 to 2019?"
  },
  {
    "input": "For how long did the subsidiary of Dell exceed $150 million in annual support and administrative costs?",
    "output": "How many years did Dell subsidiary support and administrative costs exceed $150 million?"
  },
  {
    "input": "What specific expenses or liabilities are encompassed within the current balances owed to affiliated individuals or entities?",
    "output": "What did the current amounts due to related parties include?"
  },
  {
    "input": "What specific items or transactions are included in the present outstanding balances owed to related parties?",
    "output": "What did the current amounts due to related parties include?"
  },
  {
    "input": "For how long has the current outstanding amount owed by related parties been over $1,000 million?",
    "output": "How many years did current amount due from related parties exceed $1,000 million?"
  },
  {
    "input": "For how long has the present outstanding balance owed by affiliated organizations been over $1,000 million?",
    "output": "How many years did current amount due from related parties exceed $1,000 million?"
  },
  {
    "input": "What was the percentage increase or decrease in the net current amount due from related parties from 2019 to 2020?",
    "output": "What was the percentage change in the net current amount due from related parities between 2019 and 2020?"
  },
  {
    "input": "What was the change in Additions to intangible assets between 2019 and 2020?",
    "output": "What was the difference in the amount of intangible assets added between the years 2019 and 2020?"
  },
  {
    "input": "What is the total amount of lease liabilities for operating leases recorded on the current financial statements?",
    "output": "What were the current lease liabilities for operating leases?"
  },
  {
    "input": "What is the total amount of lease liabilities for operating leases as of the present moment?",
    "output": "What were the current lease liabilities for operating leases?"
  },
  {
    "input": "How many lease liabilities for operating leases exceeded $500 million?",
    "output": "How many operating lease liabilities were there that exceeded $500 million in amount?"
  },
  {
    "input": "What differentiates current lease liabilities from non-current lease liabilities in the context of operating leases?",
    "output": "What was the difference between current and non-current lease liabilities for operating leases?"
  },
  {
    "input": "What differentiates current lease liabilities from non-current lease liabilities for operating leases?",
    "output": "What was the difference between current and non-current lease liabilities for operating leases?"
  },
  {
    "input": "What was the discrepancy in the aggregate lease obligations between operating leases and finance leases?",
    "output": "What was the difference in total lease liabilities between operating leases and finance leases?"
  },
  {
    "input": "Which years does the table provide information for unearned revenue?",
    "output": "For which specific years does the table contain information regarding unearned revenue?"
  },
  {
    "input": "Which years does the table provide information for unearned revenue?",
    "output": "\"For which specific years does the table provide data on unearned revenue?\""
  },
  {
    "input": "What was the percentage change in Unearned subscription and SaaS revenue from 2019 to 2020?",
    "output": "What was the change in Unearned subscription and SaaS revenue between 2019 and 2020?"
  },
  {
    "input": "What was the foreign income in 2018?",
    "output": "What was the total amount of income received from foreign sources during the year 2018?"
  },
  {
    "input": "What is the percentage difference in the total income prior to income tax between the years 2019 and 2020?",
    "output": "What was the percentage change in the total income before income tax between 2019 and 2020?"
  },
  {
    "input": "What was the total value of construction projects underway in 2020 globally or within a specific country/region?",
    "output": "What was the amount of construction in progress in 2020?"
  },
  {
    "input": "What was the difference in the total value of property and equipment from 2019 to 2020?",
    "output": "What was the change in Total property and equipment between 2019 and 2020?"
  },
  {
    "input": "What was the change in the Unrealized losses on available-for-sale securities between 2017 and 2018?",
    "output": "What was the difference in the amount of Unrealized losses on available-for-sale securities from 2017 to 2018?"
  },
  {
    "input": "What was the percentage change in the Reclassification of realized transactions, net of taxes between 2018 and 2019?",
    "output": "What percentage represents the difference in the Reclassification of realized transactions, net of taxes between the years 2018 and 2019?"
  },
  {
    "input": "What impact did the alteration in the balance of net accounts receivable have on the financial situation?",
    "output": "What was the effect of change in the net accounts receivable?"
  },
  {
    "input": "What is the ratio of long-term deferred tax liability to accrued liabilities?",
    "output": "What was the Long-term deferred tax liability as a ratio of Accrued liabilities?"
  },
  {
    "input": "How did the Other interest expense change between the years 2017 and 2018?",
    "output": "What was the change in Other interest expense between 2017 and 2018?"
  },
  {
    "input": "What was the difference between the reported net sales and the reported cost of sales in terms of amount?",
    "output": "What was the difference in amount as reported between net sales and cost of sales?"
  },
  {
    "input": "What was the difference between Net income from continuing operations and Income before income taxes?",
    "output": "What is the distinction between net income from continuing operations and income before income taxes? Please provide a comprehensive response, retaining all the details provided in the original question. Keep the question brief, succinct, and in the same language as the original."
  },
  {
    "input": "What was the impact of stock options and RSUs on dilution in 2019?",
    "output": "What was the Dilutive effect of stock options and RSUs in 2019?"
  },
  {
    "input": "How many consecutive years did the balance at the end of each year surpass $2 million?",
    "output": "How many years did the balance at end of the year exceed $2 million?"
  },
  {
    "input": "For how many years was the balance at the end of each year above $2 million?",
    "output": "How many years did the balance at end of the year exceed $2 million?"
  },
  {
    "input": "How many years did Intercompany prepaid tax asset amortization exceed $7 million?",
    "output": "For how many years was the amortization of Intercompany prepaid tax asset greater than $7 million?"
  },
  {
    "input": "What was the service cost in 2018?",
    "output": "What was the cost of the service provided in 2018?"
  },
  {
    "input": "What was the service cost in 2018?",
    "output": "What was the cost of the service in the year 2018 specifically?"
  },
  {
    "input": "What was the total amount of interest incurred in the year 2019?",
    "output": "What was the interest cost in 2019?"
  },
  {
    "input": "What was the total expense incurred as interest in the year 2019?",
    "output": "What was the interest cost in 2019?"
  },
  {
    "input": "What was the percentage difference in the Discount rate from 2018 to 2019?",
    "output": "What was the change in the Discount rate between 2018 and 2019?"
  },
  {
    "input": "What was the specific difference in the Discount rate from the year 2018 to the year 2019?",
    "output": "What was the change in the Discount rate between 2018 and 2019?"
  },
  {
    "input": "Why could the sums fail to equal the overall total?",
    "output": "Why might amounts may not add to the total?"
  },
  {
    "input": "\"What was the numerical distinction between the total amount obtained from sales after deducting all applicable costs, and the total revenue generated from sales before deducting any expenses or costs?\"",
    "output": "What was the difference between the total net sales and gross profit?"
  },
  {
    "input": "What is the calculated percentage change in the operating income from the third quarter to the fourth quarter?",
    "output": "What was the percentage change in the Operating income between the third and fourth quarter?"
  },
  {
    "input": "What was the change in the cost of sales between 2017 and 2018?",
    "output": "How much did the cost of sales change from 2017 to 2018?"
  },
  {
    "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the amount of Net deferred tax asset from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the Net deferred tax asset from 2018 to 2019?"
  },
  {
    "input": "What is the percentage difference in the total balance from 2018 to 2019? Calculate using the formula for percentage change.",
    "output": "What was the percentage change in the total balance between 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the total balance from 2018 to 2019?",
    "output": "What was the percentage change in the total balance between 2018 and 2019?"
  },
  {
    "input": "For how long, in terms of number of years, has the cost of sales exceeded 40% of net sales?",
    "output": "How many years did cost of sales of net sales exceed 40%?"
  },
  {
    "input": "What is the percentage increase or decrease in operating income as a proportion of net sales from 2018 to 2019?",
    "output": "What was the percentage change in operating income of net sales between 2018 and 2019?"
  },
  {
    "input": "What was the exact percentage increase or decrease in operating income of net sales from 2018 to 2019?",
    "output": "What was the percentage change in operating income of net sales between 2018 and 2019?"
  },
  {
    "input": "How many years have there been increases in tax positions related to the current year that exceeded $30 million?",
    "output": "How many years did Increases related to current year tax positions exceed $30 million?"
  },
  {
    "input": "What was the percentage change in the Ending balance between 2018 and 2019?",
    "output": "What was the percentage difference in the Ending balance from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the Ending balance between 2018 and 2019?",
    "output": "What is the percent increase or decrease in the Ending balance from 2018 to 2019?"
  },
  {
    "input": "According to what were the plan benefits provided, and to what were they in accordance with?",
    "output": "What were plan benefits provided in accordance with?"
  },
  {
    "input": "In accordance with what, were the plan benefits provided? Can you expand on the specific benefits mentioned in the plan?",
    "output": "What were plan benefits provided in accordance with?"
  },
  {
    "input": "What were the total service costs incurred during the year 2018?",
    "output": "What were the service costs in 2018?"
  },
  {
    "input": "What were the specific costs associated with services provided in 2018?",
    "output": "What were the service costs in 2018?"
  },
  {
    "input": "What were the total interest expenses incurred in the year 2017?",
    "output": "What were the interest costs in 2017?"
  },
  {
    "input": "What was the total amount spent on interest expenses in the year 2017?",
    "output": "What were the interest costs in 2017?"
  },
  {
    "input": "What was the difference in the amount of Amortization of actuarial loss between 2018 and 2019?",
    "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount allocated for the gradual reduction of actuarial loss in the financial statements between the years 2018 and 2019?",
    "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in the net pension period cost between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the net pension period cost from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the net pension period cost between 2018 and 2019?",
    "output": "What was the percentage difference in the net pension period cost from 2018 to 2019?"
  },
  {
    "input": "How was the lower of cost and net realizable value determined as the valuation method for inventories? Please provide a detailed explanation of the process.",
    "output": "How were inventories valued at the lower of cost and net realizable value?"
  },
  {
    "input": "What was the quantity or volume of raw materials used in the year 2018?",
    "output": "What was the amount of raw materials in 2018?"
  },
  {
    "input": "For how long has the value of Finished goods remained above $200 million?",
    "output": "How many years did the amount of Finished goods exceed $200 million?"
  },
  {
    "input": "What was the total value of both Core and developed technology assets combined?",
    "output": "What was the gross amount of Core and developed technology assets?"
  },
  {
    "input": "What was the value assigned to each share granted on a specific date at March 31, 2017, taking into account their respective weights?",
    "output": "What was the Weighted Average Grant Date Fair Value for nonvested shares at March 31, 2017?"
  },
  {
    "input": "What was the percentage increase or decrease in the number of shares that have not yet vested from 2018 to 2019?",
    "output": "What was the percentage change in the number of nonvested shares between 2018 and 2019?"
  },
  {
    "input": "What was the cumulative intrinsic value of options and stock appreciation rights (SARs) exercised throughout the fiscal years ending on March 31, 2019?",
    "output": "What was the total intrinsic value of options and SARs exercised during the years ended March 31, 2019?"
  },
  {
    "input": "In 2019, how many options and SAR (stock appreciation rights) shares were available for exercise?",
    "output": "What was the number of option and SAR shares exercisable in 2019?"
  },
  {
    "input": "Which years does the table provide the number of outstanding shares for?",
    "output": "Which specific years are mentioned in the table that provides the information regarding the number of outstanding shares?"
  },
  {
    "input": "What was the difference in the Weighted Average Exercise Price per Share for outstanding shares between the years 2017 and 2018?",
    "output": "What was the change in the Weighted Average Exercise Price per Share for outstanding shares between 2017 and 2018?"
  },
  {
    "input": "\"What was the calculated percentage difference in the quantity of outstanding shares from 2018 to 2019?\"",
    "output": "What was the percentage change in the number of outstanding shares between 2018 and 2019?"
  },
  {
    "input": "For which specific years does the table provide information on Identifiable long-lived assets (property, plant, and equipment net of accumulated amortization) within different geographic areas?",
    "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?"
  },
  {
    "input": "Which geographic areas are included in the table that provides information on identifiable long-lived assets, specifically property, plant, and equipment (net of accumulated amortization) for different years?",
    "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?"
  },
  {
    "input": "What was the total value of assets held in different countries during the year 2019?",
    "output": "What was the amount of assets in Various other countries in 2019?"
  },
  {
    "input": "What was the total value of assets in various countries in 2019?",
    "output": "What was the amount of assets in Various other countries in 2019?"
  },
  {
    "input": "What percentage increase or decrease occurred in the total value of long-lived assets from 2018 to 2019?",
    "output": "What was the percentage change in total long-lived assets between 2018 and 2019?"
  },
  {
    "input": "What was the change in the Deferred expense for State between 2018 and 2019?",
    "output": "What is the difference in the amount of Deferred expense for the State from 2018 to 2019?"
  },
  {
    "input": "What was the change in the Deferred expense for State between 2018 and 2019?",
    "output": "What was the difference in the amount of Deferred expense for State between the years 2018 and 2019?"
  },
  {
    "input": "What is the cumulative balance of all time deposits?",
    "output": "What was the total balance time deposits?"
  },
  {
    "input": "What is the aggregate amount of money held in time deposits at present?",
    "output": "What was the total balance time deposits?"
  },
  {
    "input": "What was the specific balance of Inventories on April 1, 2018, as recorded in the financial records or statements?",
    "output": "What was the balance of Inventories on April 1, 2018?"
  },
  {
    "input": "What was the adjustment made in ASC 606 for other current assets? Please provide details regarding how this adjustment affected the accounting treatment and reporting of other current assets under ASC 606.",
    "output": "What was the adjustment from ASC 606 for other current assets?"
  },
  {
    "input": "How many liabilities had a balance on March 31, 2018 that exceeded $300 million?",
    "output": "How many liabilities, as of March 31, 2018, had a balance surpassing $300 million?"
  },
  {
    "input": "What was the percentage change in Other assets specifically attributed to the adjustments made?",
    "output": "What was the percentage change in Other assets due to the adjustments?"
  },
  {
    "input": "What were the net sales in 2016?",
    "output": "What was the total amount of sales generated in the year 2016?"
  },
  {
    "input": "What was the difference in net sales from 2016 to 2017, and how can this information assist in better answering the question?",
    "output": "What was the change in net sales between 2016 and 2017?"
  },
  {
    "input": "What was the difference in net sales revenue from 2016 to 2017?",
    "output": "What was the change in net sales between 2016 and 2017?"
  },
  {
    "input": "Which years did assets from Thailand exceed $200 million?",
    "output": "Which specific years did the total value of assets originating from Thailand surpass $200 million?"
  },
  {
    "input": "Which years did assets from Thailand exceed $200 million?",
    "output": "In which specific years did the total value of assets originating from Thailand surpass $200 million?"
  },
  {
    "input": "What was the percentage change in the total long-lived assets between 2018 and 2019?",
    "output": "What was the percentage difference in the total value of long-lived assets from 2018 to 2019, indicating whether it increased or decreased?"
  },
  {
    "input": "In which years was the total annual compensation calculated in?",
    "output": "What are the specific years for which the total annual compensation was calculated?"
  },
  {
    "input": "Which geographic locations are taken into account for calculating the overall current tax expense as mentioned in the table?",
    "output": "What are the geographic locations in the table considered when calculating the total current tax expense?"
  },
  {
    "input": "What is the highest recorded tax expense in Germany and in which specific year did it occur?",
    "output": "In which year was the current tax expense in Germany the largest?"
  },
  {
    "input": "What is the highest recorded tax expense in Germany, and in which specific year was it observed?",
    "output": "In which year was the current tax expense in Germany the largest?"
  },
  {
    "input": "What was the amount of non-current assets in APJ in 2019?",
    "output": "What was the specific value of non-current assets within the Asia-Pacific-Japan (APJ) region during the year 2019?"
  },
  {
    "input": "During which specific years were the calculations of Non-Current Assets by Region conducted?",
    "output": "In which years were the Non-Current Assets by Region calculated?"
  },
  {
    "input": "In which year was the amount in Rest of Americas larger?",
    "output": "In what specific year did the Rest of Americas region experience a higher amount compared to previous years?"
  },
  {
    "input": "What was the numerical difference in the quantity of Rest of Americas in 2019 compared to 2018?",
    "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?"
  },
  {
    "input": "What was the difference in the quantity of Rest of Americas in 2019 compared to 2018?",
    "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?"
  },
  {
    "input": "How was the Total expense for the share-based payment plans of Executive Board members determined?",
    "output": "How was the total expense for the share-based payment plans determined specifically for the Executive Board members?"
  },
  {
    "input": "During which specific years is the Total Expense for Share-Based Payment given or provided?",
    "output": "In which years is the Total Expense for Share-Based Payment provided?"
  },
  {
    "input": "For which specific years is the Total Expense for Share-Based Payment given?",
    "output": "In which years is the Total Expense for Share-Based Payment provided?"
  },
  {
    "input": "What is the specific value or quantity for the EMEA region in 2019?",
    "output": "What is the amount for EMEA in 2019?"
  },
  {
    "input": "What is the amount for APJ in 2018?",
    "output": "What was the total amount of APJ in the year 2018?"
  },
  {
    "input": "When did Germany have the highest amount?",
    "output": "In which year was the amount for Germany the largest?"
  },
  {
    "input": "When did Germany experience its highest amount?",
    "output": "In which year was the amount for Germany the largest?"
  },
  {
    "input": "What is the specific monetary figure spent on employee benefits in the year 2019?",
    "output": "What is the amount of employee benefits expenses in 2019?"
  },
  {
    "input": "What was the percentage difference in the amount spent on pensions in 2019 compared to 2018?",
    "output": "What was the percentage change in Pension expenses in 2019 from 2018?"
  },
  {
    "input": "What was the total cost of services offered in the year 2019?",
    "output": "What was the Cost of services in 2019?"
  },
  {
    "input": "What was the specific cost of services rendered in the year 2019?",
    "output": "What was the Cost of services in 2019?"
  },
  {
    "input": "\"When did the Cost of services reach its highest value?\"",
    "output": "In which year was Cost of services largest?"
  },
  {
    "input": "In which specific year did the Cost of services reach its maximum value, considering all available data?",
    "output": "In which year was Cost of services largest?"
  },
  {
    "input": "What was the change in issued capital in 2019 from 2018?",
    "output": "What was the net change in the amount of issued capital between the years 2018 and 2019?"
  },
  {
    "input": "What was the change in issued capital in 2019 from 2018?",
    "output": "What was the difference in the amount of issued capital between 2019 and 2018? How much did the issued capital change from 2018 to 2019?"
  },
  {
    "input": "What is the percentage of unused tax losses in 2019 that can be attributed to state tax loss carryforwards in the United States?",
    "output": "How much of unused tax losses relate to U.S. state tax loss carryforwards in 2019?"
  },
  {
    "input": "In which year was the amount Expiring after the following year the largest?",
    "output": "In what year did the highest amount of expenses expire after the subsequent year?"
  },
  {
    "input": "What is the meaning and scope of the rights associated with Bill McDermott?",
    "output": "What do the rights for Bill McDermott refer to? "
  },
  {
    "input": "What specific types of entitlements are included in the table when referring to annual pension entitlements?",
    "output": "What kind of entitlements are annual pension entitlements as displayed in the table?"
  },
  {
    "input": "Which specific years are the annual pension entitlements fully and legally secured to an individual?",
    "output": "In which years are the annual pension entitlements vested?"
  },
  {
    "input": "What was the profit before tax in 2019?",
    "output": "What was the pre-tax profit for the fiscal year of 2019?"
  },
  {
    "input": "What was the profit before tax in 2019?",
    "output": "What was the pre-tax profit for the year 2019?"
  },
  {
    "input": "\"What was the specific tax rate applicable in 2018 for individuals or businesses in a particular country?\"",
    "output": "What was the applicable tax rate in 2018?"
  },
  {
    "input": "What was the percentage change in Other in 2019 from 2018?",
    "output": "What was the percentage difference in the category labeled \"Other\" between the years 2018 and 2019?"
  },
  {
    "input": "What market is the information specifically targeting and representing for Airtel?",
    "output": "Which market does the information reflect for Airtel?"
  },
  {
    "input": "What is the percentage increase in the number of mobile customers for AIS over a specific period?",
    "output": "What is the % growth of mobile customers for AIS?"
  },
  {
    "input": "Which associate had the best % growth in mobile customers?",
    "output": "\"Which associate experienced the highest percentage growth in the number of mobile customers over a specific period of time?\""
  },
  {
    "input": "What specific components were encompassed within the exceptional items during the fiscal year 2018?",
    "output": "What was included in the exceptional items in FY2018?"
  },
  {
    "input": "What specific items were considered exceptional in the financial year 2018?",
    "output": "What was included in the exceptional items in FY2018?"
  },
  {
    "input": "What was the amount of cash generated from operations after accounting for capital expenditures and operating expenses in the specified year?",
    "output": "What was the free cash flow for the year?"
  },
  {
    "input": "What is the % change in EBITDA margin from 2018 to 2019?",
    "output": "What is the percentage difference in EBITDA margin between 2018 and 2019?"
  },
  {
    "input": "What is the % change in EBITDA margin from 2018 to 2019?",
    "output": "What is the percentage change in EBITDA margin from the year 2018 to the year 2019?"
  },
  {
    "input": "What is the absolute difference in net profit between 2018 and 2019?",
    "output": "What is the change in net profit from 2018 to 2019 in absolute numbers?"
  },
  {
    "input": "What does the line item labeled as 'Others' provide information about in a given context or dataset?",
    "output": "What information does the line item 'Others' relate to?"
  },
  {
    "input": "What is the average total amount owed in debt for the duration of the two-year period?",
    "output": "What is the average gross debt across the 2 years?"
  },
  {
    "input": "Why does this net deferred gain balance exist?",
    "output": "Why is there a net deferred gain balance?"
  },
  {
    "input": "What is the reason for the continued presence of a net deferred gain balance after Singtel completed the sale of its entire 100% stake in NLT to NetLink NBN Trust in July 2017?",
    "output": "Why is there still a balance of net deferred gain since Singtel sold its 100% interest in NLT to NetLink NBN Trust in July 2017?"
  },
  {
    "input": "What was the year when the net deferred gain balance reached its peak?",
    "output": "In which year was the net deferred gain balance the highest?"
  },
  {
    "input": "What is the subject matter or main focus mentioned in note 22?",
    "output": "What is the topic of note 22?"
  },
  {
    "input": "What is included in the \"Others\" category in the table?",
    "output": "What does the line item \"Others\" in the table encompass?"
  },
  {
    "input": "What is the Singtel's largest joint venture in terms of the proportion of the Group's ownership?",
    "output": "Which is the largest joint venture of Singtel, in terms of the proportion of Group's ownership?"
  },
  {
    "input": "Do the non-audit services offered by KPMG LLP have an impact on their independence in conducting audits?",
    "output": "Does the non-audit services provided by KPMG LLP affect their independence?"
  },
  {
    "input": "What is included in the compensation for key management personnel?",
    "output": "What does key management personnel compensation comprise?"
  },
  {
    "input": "What is included in the compensation package for key management personnel?",
    "output": "What does key management personnel compensation comprise?"
  },
  {
    "input": "What are the components included in directors' remuneration?",
    "output": "What does directors' remuneration comprise of?"
  },
  {
    "input": "To what topic does the content of note 18.1 correspond?",
    "output": "What is the subject matter of note 18.1?"
  },
  {
    "input": "What percentage of the company's derivative financial liabilities are classified as non-current liabilities disclosed in their financial statements?",
    "output": "How many % of the company's derivative financial liabilities are being disclosed as non-current?"
  },
  {
    "input": "What is Note 21 about?",
    "output": "What is the topic of note 21?"
  },
  {
    "input": "Can you please provide details on the terms and conditions of the advances made by the shareholders?",
    "output": "What are the terms of the shareholders' advances?"
  },
  {
    "input": "\"What are the specific terms and conditions associated with the advances provided to shareholders?\"",
    "output": "What are the terms of the shareholders' advances?"
  },
  {
    "input": "How many factors need to be considered when calculating the balance for subsidiaries?",
    "output": "How many factors are involved in calculating the balance for subsidiaries?"
  },
  {
    "input": "\"What is typically included in the category of selling and administrative costs?\"",
    "output": "What does selling and administrative costs include?"
  },
  {
    "input": "How many distinct categories of operating expenses exist?",
    "output": "How many different type of operating expenses are there?"
  },
  {
    "input": "What is the mean value of the three highest subcategories within operating expenses for the year 2019?",
    "output": "What is the average of the top 3 operating expenses subcategories in 2019?"
  },
  {
    "input": "What is the average value of the three highest subcategories of operating expenses in the year 2019?",
    "output": "What is the average of the top 3 operating expenses subcategories in 2019?"
  },
  {
    "input": "What are the projected maximum future payouts for Barry Litwin and Thomas Clark under the NEO plan of 2019?",
    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?"
  },
  {
    "input": "What is the maximum estimated future payout amount under the 2019 NEO plan specifically designated for Barry Litwin and Thomas Clark?",
    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?"
  },
  {
    "input": "What are the projected highest expected future payouts under the 2019 NEO plan for both Manoj Shetty and Lawrence Reinhold?",
    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Manoj Shetty and Lawrence Reinhold?"
  },
  {
    "input": "What are the audit fees incurred by the company in 2018 and 2019?",
    "output": "What were the audit fees that the company had to pay in the years 2018 and 2019?"
  },
  {
    "input": "What is the percentage difference in the total amount of fees paid by the company for all other expenses between the years 2018 and 2019?",
    "output": "What is the percentage change in all other fees incurred by the company between 2018 and 2019?"
  },
  {
    "input": "What percentage of common stock does Barry Litwin and Robert D. Rosenthal collectively own?",
    "output": "What is the percent of common stock owned by Barry Litwin and Robert D. Rosenthal?"
  },
  {
    "input": "What is the total compensation received by Robert D. Rosenthal and Chad M. Lindbloom respectively during fiscal 2019?",
    "output": "What were the individual total compensations received by Robert D. Rosenthal and Chad M. Lindbloom in fiscal year 2019?"
  },
  {
    "input": "What is the total compensation received by Paul S. Pearlman and Lawrence Reinhold respectively during fiscal 2019?",
    "output": "What were the individual total compensations earned by Paul S. Pearlman and Lawrence Reinhold in fiscal year 2019?"
  },
  {
    "input": "What is the definition and significance of stock awards?",
    "output": "What does stock awards refer to?"
  },
  {
    "input": "What are the Thomas Clark's accelerated vesting of stock options and unvested performance restricted stock units respectively?",
    "output": "Can you please provide specific details about the accelerated vesting of stock options of Thomas Clark? Additionally, what are the details regarding the unvested performance restricted stock units held by Thomas Clark?"
  },
  {
    "input": "What is the meaning or definition of the \"other\" description in the allowance for sales return in the year 2017?",
    "output": "What does the \"other\" description in allowance for sales return in 2017 refer to?"
  },
  {
    "input": "What is the cumulative amount of allowances for sales returns write-offs from 2017 to 2019?",
    "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?"
  },
  {
    "input": "What is the total amount of allowances accounted for sales returns and write-offs from 2017 to 2019?",
    "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?"
  },
  {
    "input": "What is the percentage difference in the allowance for deferred tax assets at the end of the period in 2018 compared to 2019?",
    "output": "What is the percentage change in the allowance for deferred tax assets at the end of period between 2018 and 2019?"
  },
  {
    "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?",
    "output": "What is the accelerated vesting schedule for Manoj Shetty's stock options? Additionally, how many unvested performance restricted stock units does he currently have?"
  },
  {
    "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?",
    "output": "What is the accelerated vesting timeline for Manoj Shetty's stock options? Additionally, what is the status of his unvested performance restricted stock units?"
  },
  {
    "input": "What is the difference in consolidated gross profits comparing 2019 to 2018, and also 2018 to 2017?",
    "output": "What is the change in consolidated gross profits between 2019 vs 2018 and 2018 vs 2017 respectively?"
  },
  {
    "input": "What is the total consolidated net sales in 2019 and 2018?",
    "output": "What were the combined net sales for the fiscal years of 2019 and 2018?"
  },
  {
    "input": "What is the total consolidated net sales in 2017 and 2018",
    "output": "What were the combined net sales for the years 2017 and 2018?"
  },
  {
    "input": "What is the total consolidated net sales in 2017 and 2018",
    "output": "What were the combined net sales for 2017 and 2018?"
  },
  {
    "input": "What is the change in consolidated gross profit between 2017 and 2018?",
    "output": "What is the difference in consolidated gross profit for the company between the years 2017 and 2018, and how does the value change during this period?"
  },
  {
    "input": "What is the change in consolidated gross profit between 2017 and 2018?",
    "output": "What is the specific difference in consolidated gross profit when comparing the financial years of 2017 and 2018?"
  },
  {
    "input": "What was the percentage of the total population or total something specific (specify) from Singapore in the year 2019?",
    "output": "What was the percentage of total from Singapore in 2019?"
  },
  {
    "input": "What was the average amount of money sent from Singapore in the years 2018 and 2019?",
    "output": "What was the average amount from Singapore in 2018 and 2019?"
  },
  {
    "input": "What was the average amount of money, in Singapore currency, that was received from Singapore in the years 2018 and 2019 combined?",
    "output": "What was the average amount from Singapore in 2018 and 2019?"
  },
  {
    "input": "How is the fair value of a financial instrument of a company determined and defined?",
    "output": "How is the fair value of the company's financial instrument defined?"
  },
  {
    "input": "In what specific year did the interest rate swap fall below the threshold of 1,000 thousands?",
    "output": "In which year was interest rate swap less than 1,000 thousands?"
  },
  {
    "input": "In which specific year did the interest rate swap fall below one million in thousands?",
    "output": "In which year was interest rate swap less than 1,000 thousands?"
  },
  {
    "input": "What are the three levels of subjectivity and could you provide more information to assist in providing a comprehensive answer?",
    "output": "What are the 3 levels of subjectivity?"
  },
  {
    "input": "What are the 3 distinct levels of subjectivity and how can they be described or defined?",
    "output": "What are the 3 levels of subjectivity?"
  },
  {
    "input": "\"What were the additional features or updates introduced in the year 2019? Similarly, what were the new additions or improvements made in 2018?\"",
    "output": "What were the other additions in 2019 and 2018 respectively?"
  },
  {
    "input": "\"What are the average number of other additions made in 2018 and 2019?\"",
    "output": "What is the average other additions for 2018 and 2019?"
  },
  {
    "input": "\"What is the average number of non-inclusive additions made in the years 2018 and 2019 combined?\"",
    "output": "What is the average other additions for 2018 and 2019?"
  },
  {
    "input": "In what specific year did the transfer of developed technology from IPR&D exceed an amount greater than 4,000 thousands?",
    "output": "In which year was Transfers to developed technology from IPR&D greater than 4,000 thousands?"
  },
  {
    "input": "What were the average Selling, General, and Administrative expenses for the financial years ending on December 31, 2019 and 2018?",
    "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?"
  },
  {
    "input": "\"What is the average amount spent on Selling, general and administrative expenses for the Years Ended December 31, 2019 and 2018?\"",
    "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?"
  },
  {
    "input": "What was the percentage decrease in the Selling, general and administrative expenses during the year 2019?",
    "output": "What was the decrease in Selling, general and administrative in 2019?"
  },
  {
    "input": "What was the number of shares granted in 2019?",
    "output": "How many shares were awarded in the year 2019?"
  },
  {
    "input": "Define research and development activities.",
    "output": "What are research and development activities?"
  },
  {
    "input": "What was the average amount spent on net revenue for the years ending on December 31, 2019 and 2018?",
    "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?"
  },
  {
    "input": "What is the average Cost of net revenue for the period spanning from December 31, 2018, to December 31, 2019?",
    "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?"
  },
  {
    "input": "In which year was Restructuring expense 0 thousands?",
    "output": "In which specific year did the Restructuring expense amount to zero thousands (thousands referring to the unit) without any incurred costs?"
  },
  {
    "input": "In which year was Restructuring expense 0 thousands?",
    "output": "What was the specific year when the Restructuring expense amounted to 0 thousands?"
  },
  {
    "input": "\"What were the transfers of developed technology from IPR&D in the years 2019 and 2018 respectively?\"",
    "output": "What were the Transfers to developed technology from IPR&D in 2019 and 2018 respectively?"
  },
  {
    "input": "What was the change in the Transfers to developed technology from IPR&D from 2018 to 2019?",
    "output": "What was the difference in the amount of transfers to developed technology from the Intangible Property, Research, and Development (IPR&D) category between the years 2018 and 2019?"
  },
  {
    "input": "In which year was Transfers to developed technology from IPR&D negative?",
    "output": "In what specific year did Transfers to developed technology from IPR&D show a negative value?"
  },
  {
    "input": "What is the average Short-term restricted cash for 2018 and 2019?",
    "output": "What is the average amount of Short-term restricted cash for the years 2018 and 2019?"
  },
  {
    "input": "What was the average Net cash provided by operating activities from December 31, 2018 to December 31, 2019?",
    "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?"
  },
  {
    "input": "What is the average amount of cash generated from operating activities, received or used, for the period spanning from December 31, 2018 to December 31, 2019?",
    "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?"
  },
  {
    "input": "What was the respective Net cash used in investing activities in 2019 and 2018?",
    "output": "What were the net amounts of cash used in investing activities for the years 2019 and 2018 respectively?"
  },
  {
    "input": "What was the respective Net cash used in investing activities in 2019 and 2018?",
    "output": "\"What was the amount of net cash utilized in investing activities during the years 2019 and 2018?\""
  },
  {
    "input": "What was the net value of long-term debt in 2019 after deducting any related expenses or liabilities?",
    "output": "What was the Net carrying amount of long-term debt in 2019?"
  },
  {
    "input": "What was the specific amount of long-term debt after deducting all necessary expenses from the total amount in 2018?",
    "output": "What was the Net carrying amount of long-term debt in 2018?"
  },
  {
    "input": "What was the specific amount of long-term debt, after deducting any reductions or write-offs, recorded on the financial statements for the year 2018?",
    "output": "What was the Net carrying amount of long-term debt in 2018?"
  },
  {
    "input": "What is the mean value of the unamortized debt discount for the fiscal years 2018 and 2019?",
    "output": "What is the average Unamortized debt discount for 2018 and 2019?"
  },
  {
    "input": "When was the year when the net carrying amount of long-term debt was less than 210,000 thousands?",
    "output": "In which year was Net carrying amount of long-term debt less than 210,000 thousands?"
  },
  {
    "input": "What is the average working capital for the period from December 31, 2018 to December 31, 2019?",
    "output": "What is the average Working capital for December 31, 2019 to 2018?"
  },
  {
    "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?",
    "output": "What is the average amount of Cash and cash equivalents for the period from December 31, 2018 to December 31, 2019?"
  },
  {
    "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?",
    "output": "What is the average amount of Cash and cash equivalents as of December 31 for the years 2019 and 2018 combined?"
  },
  {
    "input": "What is the eligibility of Performance-based restricted stock to vest?",
    "output": "What criteria must be met for Performance-based restricted stock to become vested and eligible for acquisition?"
  },
  {
    "input": "What were the adjustments in 2018?",
    "output": "\"What specific adjustments, if any, occurred during the year 2018?\""
  },
  {
    "input": "When was the year when the ending balance fell below $240,000,000?",
    "output": "In which year was the ending balance less than 240,000 thousands?"
  },
  {
    "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2018, after accounting for all deductions and offsets?",
    "output": "What was the net expense in the year ended December 31, 2018?"
  },
  {
    "input": "What was the total amount of expenditures for the financial year that concluded on December 31, 2018?",
    "output": "What was the net expense in the year ended December 31, 2018?"
  },
  {
    "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2019?",
    "output": "What was the net expense in the year ended December 31, 2019?"
  },
  {
    "input": "What was the total expenditure for the period that concluded on December 31, 2019?",
    "output": "What was the net expense in the year ended December 31, 2019?"
  },
  {
    "input": "What was the percentage increase in the cost of services in 2019?",
    "output": "How much did Cost of services increase in 2019?"
  },
  {
    "input": "What was the percentage increase in the cost of services in the year 2019 compared to the previous year?",
    "output": "How much did Cost of services increase in 2019?"
  },
  {
    "input": "What was the Less Capital expenditures (including capitalized software) in 2019?",
    "output": "What was the total amount spent on Less Capital Expenditures, including expenditure on capitalized software, during the year 2019?"
  },
  {
    "input": "What was the Less Capital expenditures (including capitalized software) in 2019?",
    "output": "\"What was the total amount of capital expenditures, including capitalized software, that occurred in the year 2019?\""
  },
  {
    "input": "What was the free cash flow in 2019?",
    "output": "What was the amount of free cash flow generated in the year 2019?"
  },
  {
    "input": "What was the average free cash flow for 2018 and 2019?",
    "output": "What was the average amount of free cash flow generated for the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in the Less Capital expenditures from 2018 to 2019?",
    "output": "What was the percentage difference in the amount spent on capital expenditures between 2018 and 2019?"
  },
  {
    "input": "What was the exact total value of the assets owned by a particular entity/company during the year 2019?",
    "output": "What was the Total assets in 2019?"
  },
  {
    "input": "What was the total value of assets in the year 2019?",
    "output": "What was the Total assets in 2019?"
  },
  {
    "input": "What was the average amount of operating income recorded during the period from 2015 to 2019?",
    "output": "What was the average operating income for 2015-2019?"
  },
  {
    "input": "What was the change in the Per common share – basic from 2018 to 2019?",
    "output": "What was the exact change in the Per common share – basic value from 2018 to 2019? Please provide the specific numerical difference between the two years."
  },
  {
    "input": "What were the amounts of Service and other revenues for the years ended December 31, 2019 and 2018?",
    "output": "How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
  },
  {
    "input": "What were the respective amounts of Service and other revenues for the years ended December 31, 2019 and 2018?",
    "output": "How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
  },
  {
    "input": "\"What were the respective revenue amounts generated from Wireless equipment for the years ending December 31, 2019 and 2018?\"",
    "output": "How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
  },
  {
    "input": "What was the total amount of revenue generated from Wireless equipment during the years ending December 31, 2019 and 2018?",
    "output": "How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
  },
  {
    "input": "What was the percentage change in the value of Small and Medium Businesses from 2018 to 2019?",
    "output": "What is the change in Small and Medium Business value from 2018 to 2019?"
  },
  {
    "input": "What is the Consolidated Net Income in 2019?",
    "output": "What is the Consolidated Net Income for the year 2019?"
  },
  {
    "input": "What is the Consolidated Net Income in 2019?",
    "output": "What is the exact amount of Consolidated Net Income recorded in the fiscal year of 2019?"
  },
  {
    "input": "What is the difference in Consolidated Adjusted EBITDA between 2018 and 2019?",
    "output": "What is the change in Consolidated Adjusted EBITDA from 2018 to 2019?"
  },
  {
    "input": "What was the net change in accumulated other comprehensive income for the year 2016?",
    "output": "What was the net increase to accumulated other comprehensive income in 2016?"
  },
  {
    "input": "What is the difference in the amount of projected employee benefits that a company is obligated to pay, as of 2018 compared to 2019?",
    "output": "What is the change in the projected benefit obligation from 2018 to 2019?"
  },
  {
    "input": "\"What was the specific percentage of income tax that individuals had to pay during the tax year of 2018?\"",
    "output": "What was the effective income tax rate for the period 2018?"
  },
  {
    "input": "What is the percentage difference in the effective income tax rate between 2018 and 2019?",
    "output": "What is the change in Effective income tax rate from 2018 to 2019?"
  },
  {
    "input": "What specific amount, included in the year 2018, would have a positive impact on the effective income tax rate?",
    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2018?"
  },
  {
    "input": "What specific amount, included in 2017, would positively impact the effective income tax rate?",
    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?"
  },
  {
    "input": "What specific amount, if included in the 2017 earnings, would have a positive impact on the effective income tax rate?",
    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?"
  },
  {
    "input": "How did the Additions based on tax positions related to the current year change from 2018 to 2019?",
    "output": "What was the change in the Additions based on tax positions related to the current year from 2018 to 2019?"
  },
  {
    "input": "What was the average Additions for tax positions of prior years for 2017-2019?",
    "output": "What were the average additions made for tax positions in the previous years, specifically from 2017 to 2019?"
  },
  {
    "input": "What was the total long-term debt?",
    "output": "\"What was the precise amount of long-term debt accrued by the entity in question, taking into account all outstanding obligations over an extended period of time?\""
  },
  {
    "input": "What was the value of the long-term debt that was payable within less than 1 year?",
    "output": "What was the long-term debt less than 1 year?"
  },
  {
    "input": "What was the exact amount of the finance lease obligation in its entirety?",
    "output": "What was the total finance lease obligation?"
  },
  {
    "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?",
    "output": "What distinguishes long-term debt due within less than one year from long-term debt due within one to three years?"
  },
  {
    "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?",
    "output": "What differentiates long-term debt with a maturity of less than 1 year from debt with a maturity ranging from 1 to 3 years?"
  },
  {
    "input": "By what percentage did Consumer's total operating revenues increase in the year 2019?",
    "output": "How much did Consumer’s total operating revenues increase in 2019?"
  },
  {
    "input": "What was the percentage increase in Consumer's total operating revenues in 2019 compared to the previous year?",
    "output": "How much did Consumer’s total operating revenues increase in 2019?"
  },
  {
    "input": "What was the percentage increase in Service Revenue for the year 2019 compared to the previous year?",
    "output": "How much did Service Revenue increase in 2019?"
  },
  {
    "input": "What is the percentage change in revenue from wireless equipment sales between 2018 and 2019?",
    "output": "What is the change in Wireless equipment revenue from 2018 to 2019?"
  },
  {
    "input": "By what percentage did the consolidated revenue increase from 2018 to 2019?",
    "output": "What was the increase in the consolidated revenue from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase/decrease in consumer revenue from 2018 to 2019?",
    "output": "What was the change in the consumer revenue from 2018 to 2019?"
  },
  {
    "input": "What is the source of financing for the firm's capital spending requirements?",
    "output": "How is the capital spending requirements for the firm financed?"
  },
  {
    "input": "How does the firm obtain financing for its capital spending needs?",
    "output": "How is the capital spending requirements for the firm financed?"
  },
  {
    "input": "What was the average cash flow used in investing activities for 2018 and 2019?",
    "output": "What was the average amount of cash used in investing activities during the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?",
    "output": "What was the percentage change in cash flow utilized for financing activities between the fiscal years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?",
    "output": "What was the exact percentage of increase or decrease in cash flow used for financing activities between 2018 and 2019?"
  },
  {
    "input": "What was the percentage difference in the cost of services between 2018 and 2019?",
    "output": "What was the change in the cost of services from 2018 to 2019?"
  },
  {
    "input": "What were the adopted standards on January 1, 2018?",
    "output": "Which Standards were Adopted on January 1, 2018?"
  },
  {
    "input": "What is the change in Accumulated other comprehensive income from December 31, 2017 to January 1, 2018?",
    "output": "What is the difference in the amount of Accumulated Other Comprehensive Income between December 31, 2017 and January 1, 2018?"
  },
  {
    "input": "What is the net change in Noncontrolling interests between December 31, 2017, and January 1, 2018?",
    "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?"
  },
  {
    "input": "What was the difference in Noncontrolling interests' value between December 31, 2017, and January 1, 2018?",
    "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?"
  },
  {
    "input": "What was the ratio of the amortized cost due within one year or less to the fair value for the same period?",
    "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?"
  },
  {
    "input": "What was the ratio between the amortized cost to be paid within one year or less and the fair value for the same period?",
    "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?"
  },
  {
    "input": "What is the specific difference between the amortized cost and fair value, at the end of five years and at the end of ten years?",
    "output": "What was the difference between the amortized cost and fair value that was due after five years through ten years?"
  },
  {
    "input": "Which specific years can be found in the table that offer additional details pertaining to the stock options of the company?",
    "output": "Which years does the table provide Additional information related to the company's stock options?"
  },
  {
    "input": "What was the specific intrinsic value associated with exercises in the year 2019?",
    "output": "What was the intrinsic value of exercises in 2019?"
  },
  {
    "input": "What were the Proceeds received from exercises in 2018?",
    "output": "What was the total amount of money received from exercises held in 2018?"
  },
  {
    "input": "What were the Proceeds received from exercises in 2018?",
    "output": "What was the total amount of money received from exercises in the year 2018?"
  },
  {
    "input": "What was the gross income in 2017, prior to deducting income taxes?",
    "output": "What was the total income before income taxes in 2017?"
  },
  {
    "input": "What was the change in foreign income before income taxes between 2017 and 2018?",
    "output": "What was the difference in the amount of income from foreign sources prior to income taxes, comparing the years 2017 and 2018? Please provide the change in figures for better understanding."
  },
  {
    "input": "What was the specific difference in foreign income, without considering income taxes, from 2018 to 2019?",
    "output": "What was the change in foreign income before income taxes between 2018 and 2019?"
  },
  {
    "input": "What were the short-term investment options available in the year 2018?",
    "output": "What were the short-term investments in 2018?"
  },
  {
    "input": "What were the types of short-term investments that were prevalent in 2018?",
    "output": "What were the short-term investments in 2018?"
  },
  {
    "input": "What years does the table provide information for total assets?",
    "output": "For which time period does the table provide data on total assets?"
  },
  {
    "input": "What years does the table provide information for total assets?",
    "output": "For which years does the table present data regarding the total assets?"
  },
  {
    "input": "What was the total debt in 2015?",
    "output": "What amount of debt, in terms of total outstanding obligations, was recorded in the year 2015?"
  },
  {
    "input": "What was the change in working capital between 2015 and 2016?",
    "output": "What was the difference in the amount of working capital between 2015 and 2016?"
  },
  {
    "input": "By what percentage did the Total stockholders' equity change from 2018 to 2019?",
    "output": "What was the percentage change in Total stockholders' equity between 2018 and 2019?"
  },
  {
    "input": "Which years does the table provide information for the funded status of the company's postretirement health care and other defined benefit plans?",
    "output": "For which specific years does the provided table present data relating to the funded status of the company's postretirement health care and other defined benefit plans?"
  },
  {
    "input": "What was the specific amount of fair value attributed to the plan assets during the year 2019?",
    "output": "What was the fair value of plan assets in 2019?"
  },
  {
    "input": "What was the fair value measurements using Level 2 for Corporate Bonds?",
    "output": "What are the fair value measurements using Level 2 for Corporate Bonds?"
  },
  {
    "input": "What was the combined value of cash, cash equivalents, and short-term investments?",
    "output": "What were the total cash, cash equivalents and short-term investments?"
  },
  {
    "input": "What was the sum of all cash, cash equivalents, and short-term investments?",
    "output": "What were the total cash, cash equivalents and short-term investments?"
  },
  {
    "input": "What was the difference in the fair value for U.S. Treasury and government debt securities between Level 1 and Level 2?",
    "output": "What was the discrepancy in the fair value of U.S. Treasury and government debt securities when classified as Level 1 and Level 2 assets?"
  },
  {
    "input": "What was the fair value of Level 2 Total cash, cash equivalents and short-term investments as a percentage of the total cash, cash equivalents and short-term investments?",
    "output": "What is the fair value, expressed as a percentage, of Level 2 Total cash, cash equivalents, and short-term investments in relation to the overall total of cash, cash equivalents, and short-term investments?"
  },
  {
    "input": "What were the distinctions between the levels of Total cash, cash equivalents, and short-term investments in terms of Level 1 and Level 2?",
    "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?"
  },
  {
    "input": "What were the disparities in the amounts of Total Cash, Cash Equivalents, and Short-Term Investments between Level 1 and Level 2?",
    "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?"
  },
  {
    "input": "On what basis was the calculation for days inventory outstanding determined?",
    "output": "What was days inventory outstanding based on?"
  },
  {
    "input": "What was the Days sales outstanding for three months ended april 2019?",
    "output": "\"What was the specific value of Days Sales Outstanding (DSO) specifically for the three-month period ending in April 2019?\""
  },
  {
    "input": "What was the change in day sales outstanding between 2018 and 2019?",
    "output": "What was the difference in the number of days it took to collect sales revenue between the years 2018 and 2019?"
  },
  {
    "input": "What was the change in day sales outstanding between 2018 and 2019?",
    "output": "What was the difference in the number of days it took to collect sales revenue between 2018 and 2019?"
  },
  {
    "input": "For how long was the days inventory outstanding higher than 20 days ?",
    "output": "How many years did days inventory outstanding exceed 20 days?"
  },
  {
    "input": "What was the total amount spent on purchases in 2018?",
    "output": "What was the Aggregate purchase price in 2018?"
  },
  {
    "input": "What was the difference in the total purchase price for all items combined in 2018 and 2019?",
    "output": "What was the change in the Aggregate purchase price between 2018 and 2019?"
  },
  {
    "input": "What was the amount of finished goods in 2019?",
    "output": "What was the specific quantity of fully completed and ready-for-sale products in the year 2019?"
  },
  {
    "input": "What was the exact quantity of inventories during the year 2018?",
    "output": "What was the amount of inventories in 2018?"
  },
  {
    "input": "What was the change in finished goods between 2018 and 2019?",
    "output": "How did the amount of finished goods change from 2018 to 2019?"
  },
  {
    "input": "What were the additions made to tax positions related to the current year in 2019, and how do they impact the overall financial situation?",
    "output": "What were the Additions based on tax positions related to the current year in 2019?"
  },
  {
    "input": "What was the change in the Balance at beginning of period between 2017 and 2018?",
    "output": "What was the difference in the starting balance from the beginning of the period in 2017 compared to 2018?"
  },
  {
    "input": "What is the overall difference in the total amount of Additions for tax positions of prior years from 2017 to 2019?",
    "output": "What was the total change in Additions for tax positions of prior years between 2017 and 2019?"
  },
  {
    "input": "What was the difference in the total expenses recorded between 2018 and 2019?",
    "output": "What was the change in expense accrued during the period between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount of expenses recorded from 2018 to 2019?",
    "output": "What was the change in expense accrued during the period between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in the Warranty costs incurred between 2018 and 2019?",
    "output": "What was the difference in warranty costs incurred between 2018 and 2019, expressed as a percentage change?"
  },
  {
    "input": "What were the total assets in 2018?",
    "output": "What was the total value of assets in the year 2018?"
  },
  {
    "input": "What was the percentage change in  Total deferred commissions between 2018 and 2019?",
    "output": "What was the percentage change in the total amount of deferred commissions from 2018 to 2019?"
  },
  {
    "input": "What was the final balance at the end of the specified period in the year 2018?",
    "output": "What was the balance at the end of period in 2018?"
  },
  {
    "input": "What was the final balance at the end of period in 2018? Please provide the balance from the specified period in 2018.",
    "output": "What was the balance at the end of period in 2018?"
  },
  {
    "input": "What was the change in the balance at beginning of period between 2018 and 2019?",
    "output": "What was the difference in the balance at the start of the period between the years 2018 and 2019?"
  },
  {
    "input": "For how many years did the ending balance surpass $3,500 million?",
    "output": "How many years did balance at end of period exceed $3,500 million?"
  },
  {
    "input": "How many years did interest income exceed $50 million?",
    "output": "For how many years was the interest income greater than $50 million?"
  },
  {
    "input": "How many years did interest income exceed $50 million?",
    "output": "How many years was the total interest income greater than $50 million?"
  },
  {
    "input": "What was the percentage change in net revenues between 2018 and 2019?",
    "output": "What was the exact percentage difference in net revenues from 2018 to 2019, and how can I calculate it accurately?"
  },
  {
    "input": "Which years does the table provide information for net property and equipment?",
    "output": "For which specific years is information regarding net property and equipment provided in the table?"
  },
  {
    "input": "Which years does the table provide information for net property and equipment?",
    "output": "For which specific years does the table present data regarding net property and equipment?"
  },
  {
    "input": "What was the total revenue generated from operational activities during the year 2017?",
    "output": "What was the income from operations in 2017?"
  },
  {
    "input": "What was the percentage change in the net income between 2018 and 2019?",
    "output": "What was the precise percentage increase or decrease in the net income from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the net income between 2018 and 2019?",
    "output": "What is the percentage difference in the net income from 2018 to 2019?"
  },
  {
    "input": "How much money was in circulation in the form of physical cash in the year 2019?",
    "output": "What was the amount of cash in 2019?"
  },
  {
    "input": "In 2019, how much cash was there?",
    "output": "What was the amount of cash in 2019?"
  },
  {
    "input": "What was the specific value or quantity of cash equivalents recorded in the financial records for the year 2018?",
    "output": "What was the amount of cash equivalents in 2018?"
  },
  {
    "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?",
    "output": "For which specific years does the table provide information related to property and equipment in geographic areas, considering the assets' physical locations?"
  },
  {
    "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?",
    "output": "For which years does the table present property and equipment information? This information is specifically related to geographic areas, and is based on the physical location of the assets."
  },
  {
    "input": "What was the total combined value of property and equipment in the United States during the year 2019?",
    "output": "What was the amount of property and equipment in U.S. in 2019?"
  },
  {
    "input": "How many years did International property and equipment exceed $150 million?",
    "output": "For how many consecutive years has the value of International property and equipment been greater than $150 million?"
  },
  {
    "input": "What was the financed unearned services revenue in 2019?",
    "output": "\"What was the amount of unearned revenue from financed services in 2019?\""
  },
  {
    "input": "What was the financed unearned services revenue in 2019?",
    "output": "What was the specific amount of unearned services revenue that was financed during the year 2019?"
  },
  {
    "input": "For how many years has the amount of financed unearned services revenue been greater than $100 million?",
    "output": "How many years did financed unearned services revenue exceed $100 million?"
  },
  {
    "input": "For how many consecutive years has the unearned services revenue in financed exceeded $100 million?",
    "output": "How many years did financed unearned services revenue exceed $100 million?"
  },
  {
    "input": "What was the change in deferred services revenue between 2018 and 2019?",
    "output": "What was the difference in the amount of deferred services revenue recorded in 2018 and 2019?"
  },
  {
    "input": "What was the sum of the net revenues from the last two quarters?",
    "output": "What is the total net revenue for the most recent two quarters?"
  },
  {
    "input": "What was the change in the Dividends per share declared between 2018 and 2019?",
    "output": "What was the difference in the amount of Dividends per share declared between the years 2018 and 2019?"
  },
  {
    "input": "What was the change in the Dividends per share declared between 2018 and 2019?",
    "output": "What was the difference in the amount of dividends per share declared for the years 2018 and 2019?"
  },
  {
    "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?",
    "output": "What was the difference in the amounts of dividends allocated to retained earnings (accumulated deficit) from 2017 to 2018?"
  },
  {
    "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?",
    "output": "What was the variation in the amount of dividends transferred to retained earnings (accumulated deficit) from 2017 to 2018?"
  },
  {
    "input": "What factors contributed to the rise in the cash inflows generated from day-to-day business operations during the year 2019?",
    "output": "What caused the increase in the cash flow from operating activities in 2019?"
  },
  {
    "input": "What is the percentage change in the Free Cash Flow, considering both an increase and decrease in value?",
    "output": "What is the increase / (decrease) in the Free Cash Flow?"
  },
  {
    "input": "What is the percentage change in Free Cash Flow?",
    "output": "What is the increase / (decrease) in the Free Cash Flow?"
  },
  {
    "input": "What are the definitions of underlying operating profit and underlying operating margin, and how do they differ from each other?",
    "output": "What is the definition of the Underlying operating profit and underlying operating margin?"
  },
  {
    "input": "What does the underlying effective tax rate measure reflects?",
    "output": "What is the definition and significance of the underlying effective tax rate and what does it reflect in terms of a company's financial situation or performance?"
  },
  {
    "input": "What does the underlying effective tax rate measure reflects?",
    "output": "What is the meaning of the underlying effective tax rate measure and what does it reflect?"
  },
  {
    "input": "What is the methodology used to calculate the effective tax rate?",
    "output": "How is the underlying effective tax rate calculated?"
  },
  {
    "input": "What is the exact difference in the underlying effective tax rate between 2018 and 2019?",
    "output": "What is the Underlying effective tax rate change from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the underlying effective tax rate between 2018 and 2019?",
    "output": "What is the Underlying effective tax rate change from 2018 to 2019?"
  },
  {
    "input": "By what percentage did the Biogas production increase from the year 2018 to 2019?",
    "output": "What is the increase in the Biogas from 2018 to 2019?"
  },
  {
    "input": "What is the average total energy?",
    "output": "What is the average amount of energy, when considering all types of energy combined?"
  },
  {
    "input": "\"What is the percentage change in the price of Fuel Oils from 2018 to 2019? Please specify if it is an increase or decrease.\"",
    "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in the price of Fuel Oils between 2018 and 2019?",
    "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?"
  },
  {
    "input": "What method is used to calculate ROIC (Return on Invested Capital)? Please provide a detailed explanation of the calculation process.",
    "output": "How is ROIC calculated?"
  },
  {
    "input": "What is the difference in the diluted combined average number of share units (in millions) between 2018 and 2019?",
    "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?"
  },
  {
    "input": "What is the difference in the diluted combined average number of share units (measured in millions) between 2018 and 2019?",
    "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?"
  },
  {
    "input": "What was the specific price quotation for investments on 31 December 2018?",
    "output": "What was the price quotation for investments as of 31 December 2018?"
  },
  {
    "input": "What was the average value of investments made by listed companies in the year 2019?",
    "output": "What is the average amount of investments of listed companies in 2019?"
  },
  {
    "input": "What was the average investment amount of listed companies in 2019?",
    "output": "What is the average amount of investments of listed companies in 2019?"
  },
  {
    "input": "What was the average percentage of ownership or voting rights held by shareholders in listed companies during the year 2019?",
    "output": "What is the average Percentage of ownership or voting rights of listed companies in 2019?"
  },
  {
    "input": "What is the average Unsecured other loans?",
    "output": "What is the average amount of unsecured loans that fall under the category of \"other loans\"?"
  },
  {
    "input": "What is the average value or rating of Integrated Device Manufacturers (IDMs)?",
    "output": "What is the average of Integrated device manufacturers?"
  },
  {
    "input": "What is the average performance or profitability of Integrated device manufacturers?",
    "output": "What is the average of Integrated device manufacturers?"
  },
  {
    "input": "What is the increase / (decrease) in the Integrated device manufacturers from 2017 to 2018?",
    "output": "What is the net change, either increase or decrease, in the number of Integrated device manufacturers from 2017 to 2018?"
  },
  {
    "input": "What is the average amount of compensation received by Directors?",
    "output": "What is the average Directors’ compensation?"
  },
  {
    "input": "What is the percentage change in compensation for Directors from the year 2018 to 2019?",
    "output": "What is the increase/ (decrease) in Directors’ compensation from 2018 to 2019?"
  },
  {
    "input": "What calculation method is employed to determine the total sum of the Company's shares?",
    "output": "What method is used to calculate the aggregate amount of the Company’s share?"
  },
  {
    "input": "What specific method is employed to calculate the total combined value of the shares held by the Company?",
    "output": "What method is used to calculate the aggregate amount of the Company’s share?"
  },
  {
    "input": "What is the average revenue generated from the sale of goods and services from December 31, 2018 to December 31, 2019?",
    "output": "What are the average Sales of goods and services for December 31, 2018 to 2019?"
  },
  {
    "input": "What is the increase/ (decrease) in Sales of goods and services for December 31, 2018 to 2019?",
    "output": "What is the percentage change in the sales of goods and services when comparing December 31, 2018, to December 31, 2019?"
  },
  {
    "input": "What is the percentage change in the sales of goods and services from January 1, 2018, to December 31, 2018?",
    "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?"
  },
  {
    "input": "What is the percentage change in the sales of goods and services from January 1, 2018 to December 31, 2018?",
    "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?"
  },
  {
    "input": "\"What is the average amount of lease obligation for the period Less than 1 Year and 1-3 Years? Please provide the average lease obligations separately for both time periods.\"",
    "output": "What is the average lease obligation for the period Less than 1 Year and 1-3 Years?"
  },
  {
    "input": "What does Purchase obligations incorporate?",
    "output": "What is included in purchase obligations?"
  },
  {
    "input": "What is the criteria for an employee to receive additional bonuses?",
    "output": "What are the specific requirements or factors that determine if an employee is eligible to receive extra bonuses in addition to their regular compensation?"
  },
  {
    "input": "What is the difference in Time Deposits: Non-Trading Purpose between Carrying Amount and Fair Amount?",
    "output": "What is the distinction between the carrying amount and fair amount of Time Deposits used for non-trading purposes?"
  },
  {
    "input": "What is the difference between Bonds: Non-Trading Purpose Carrying Amount and Fair Amount?",
    "output": "What distinguishes Bonds: Non-Trading Purpose Carrying Amount from Fair Amount in terms of their meaning, significance, and financial implications?"
  },
  {
    "input": "What is the average quantity of supplies and spare parts in general?",
    "output": "What was the average Supplies and spare parts?"
  },
  {
    "input": "What was the average quantity of supplies and spare parts used?",
    "output": "What was the average Supplies and spare parts?"
  },
  {
    "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?",
    "output": "What was the percentage change in the amount of Finished goods from the year 2018 to 2019, indicating whether it increased or decreased?"
  },
  {
    "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?",
    "output": "What was the percentage change in the value of Finished goods from 2018 to 2019, indicating whether it increased or decreased?"
  },
  {
    "input": "What is the percentage change in the value of financial assets at fair value through profit or loss between 2018 and 2019?",
    "output": "What is the increase / (decrease) in the Financial assets at fair value through profit or loss from 2018 to 2019?"
  },
  {
    "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?",
    "output": "What was the percentage change in Short-term loans from 2018 to 2019?"
  },
  {
    "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?",
    "output": "What was the percentage change in Short-term loans between 2018 and 2019?"
  },
  {
    "input": "What measures does the company implement to effectively manage, maintain, or adapt its capital structure in response to changes in the business environment?",
    "output": "What steps does the company take To maintain or adjust the capital structure?"
  },
  {
    "input": "What is the change in the total liabilities, expressed as a percentage, between 2018 and 2019?",
    "output": "What is the increase / (decrease) in the Total liabilities from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in Total Capital from 2018 to 2019?",
    "output": "What is the percentage increase / (decrease) of Total Capital from 2018 to 2019?"
  },
  {
    "input": "How do non-GAAP measures assist investors in making informed investment decisions and assessing a company's financial performance?",
    "output": "How do these non-GAAP measures aid investors?"
  },
  {
    "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?",
    "output": "What would be the diluted earnings per share for fiscal year 2018 if we exclude the net charge (benefit) associated with the enactment of the Tax Cuts and Jobs Act (TCJA)?"
  },
  {
    "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?",
    "output": "What would be the diluted earnings per share for fiscal year 2018, excluding the impact of the net charge (benefit) resulting from the implementation of the Tax Cuts and Jobs Act (TCJA)?"
  },
  {
    "input": "What was the average revenue over the 3 year period from 2017 to 2019?",
    "output": "What was the average revenue from 2017 to 2019, calculated over the span of three years?"
  },
  {
    "input": "What factors contributed to the change in the federal statutory rate during fiscal year 2017?",
    "output": "Why did the federal statutory rate in fiscal year 2017? "
  },
  {
    "input": "How many items accounted for the difference between income taxes computed at the US federal statutory rate and the company's effective rate?",
    "output": "How many specific items contributed to the variance between the income taxes calculated using the official US federal statutory rate and the actual rate paid by the company?"
  },
  {
    "input": "What was the average federal statutory rate over the 3 year period from 2017 to 2019? ",
    "output": "What was the average federal statutory rate from 2017 to 2019?"
  },
  {
    "input": "What was the average effective rate for the years 2017, 2018, and 2019?",
    "output": "What was the average effective rate over the 3 year period from 2017 to 2019? "
  },
  {
    "input": "How much were the top 3 components of property and equipment as a % of the total at cost, property and equipment for 2019?",
    "output": "What were the percentages for the top 3 components of property and equipment in relation to the total at cost, property, and equipment in 2019?"
  },
  {
    "input": "What percentage of the overall revenue in 2019 was not generated by the commercial cloud revenue?",
    "output": "How much of the total revenue in 2019 did not come from commercial cloud revenue?"
  },
  {
    "input": "Which were the bottom 2 revenue items for 2017?",
    "output": "\"What were the two lowest revenue-generating items in 2017?\""
  },
  {
    "input": "What was the average EPS (earnings per share) for the years 2017, 2018, and 2019?",
    "output": "What was the average basic earnings per share over the 3 year period from 2017 to 2019?"
  },
  {
    "input": "\"Which specific locations does the company have both operating leases and finance leases in place?\"",
    "output": "Which places does the company have operating and finance leases?"
  },
  {
    "input": "What is the topic or content covered in note 15?",
    "output": "What is note 15 about?"
  },
  {
    "input": "\"What is the ascending order of the years when sorted by their total cost of finance leases?\"",
    "output": "What are the years sorted by total finance lease cost, in ascending order?"
  },
  {
    "input": "How many different items are included when calculating operating income (loss)?",
    "output": "How many items are there for operating income (loss)?"
  },
  {
    "input": "How many different items are included in the calculation of operating income (loss)?",
    "output": "How many items are there for operating income (loss)?"
  },
  {
    "input": "How is the allocation of amortization and depreciation determined and distributed?",
    "output": "How is the portion of amortization and depreciation allocated?"
  },
  {
    "input": "How is the allocation of the portion of amortization and depreciation determined and distributed?",
    "output": "How is the portion of amortization and depreciation allocated?"
  },
  {
    "input": "How many countries have Long-lived assets classified based on the controlling statutory company's location, excluding financial instruments and tax assets? Also, can you identify the countries where the assets amount to over 10% of the total assets?",
    "output": "How many countries have Long-lived assets, excluding financial instruments and tax assets, classified by the location of the controlling statutory company and with countriesover 10% of the total assets?"
  },
  {
    "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?",
    "output": "What was the precise percentage difference in the value of total long-lived assets between the years 2017 and 2018?"
  },
  {
    "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?",
    "output": "What was the percentage increase or decrease in the value of the total long-lived assets between the years 2017 and 2018?"
  },
  {
    "input": "What is the definition and significance of purchase commitments?",
    "output": "What do the purchase commitments represent?"
  },
  {
    "input": "What is the meaning and significance of purchase commitments?",
    "output": "What do the purchase commitments represent?"
  },
  {
    "input": "Where can the long-term debt be found?",
    "output": "Where is the location to find the information regarding long-term debt?"
  },
  {
    "input": "How much do the top 3 contractual obligation terms add up to in 2020?",
    "output": "What is the total value of the top three contractual obligation terms in 2020?"
  },
  {
    "input": "How much do the top 3 contractual obligation terms add up to in 2020?",
    "output": "In 2020, what is the sum of the top three contractual obligation terms?"
  },
  {
    "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?",
    "output": "Excluding the PSUs, how many stock awards were granted in the year 2019?"
  },
  {
    "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?",
    "output": "How many stock awards were granted in 2019, excluding the granted PSUs?"
  },
  {
    "input": "What was the average fair value of the stock awards that were vested between 2017 and 2019 over a three-year period?",
    "output": "What was the average fair value of stock awards vested over the 3 year period from 2017 to 2019?"
  },
  {
    "input": "What was the intelligent cloud as a percentage of total revenue in 2019?",
    "output": "What percentage of the total revenue in 2019 was attributed to the intelligent cloud?"
  },
  {
    "input": "What was the intelligent cloud as a percentage of total revenue in 2019?",
    "output": "What percentage of total revenue in 2019 was attributed to the intelligent cloud?"
  },
  {
    "input": "How many revenue categories are there?",
    "output": "How many different categories of revenue exist in the given context?"
  },
  {
    "input": "What is the company's net loss in 2019?",
    "output": "What was the net financial loss incurred by the company during the year 2019?"
  },
  {
    "input": "What was the cumulative unrealized gain on marketable securities for the company from 2017 to 2019?",
    "output": "What is the company's total unrealized gain on marketable securities between 2017 to 2019?"
  },
  {
    "input": "What is the percentage difference in the total stock base compensation, categorized by type of award, from 2018 to 2019?",
    "output": "What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in the total stock base compensation for each type of award from 2018 to 2019?",
    "output": "What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?"
  },
  {
    "input": "What is the cumulative amount of stock based compensation from 2017 through 2019?",
    "output": "What is the total stock based compensation amount between 2017 to 2019?"
  },
  {
    "input": "What is the sum of stock-based compensation expenses for the years 2017 to 2019?",
    "output": "What is the total stock based compensation amount between 2017 to 2019?"
  },
  {
    "input": "As of December 31, 2018, how many shares were granted and are currently outstanding?",
    "output": "What is the number of outstanding shares granted as of December 31, 2018? "
  },
  {
    "input": "As of December 31, 2018, how many shares were granted and still remain outstanding?",
    "output": "What is the number of outstanding shares granted as of December 31, 2018? "
  },
  {
    "input": "What was the change in the number of outstanding shares between December 31, 2018, and December 31, 2019?",
    "output": "What is the difference in outstanding shares as of December 31, 2018 and 2019?"
  },
  {
    "input": "What data is shown in the table?",
    "output": "\"What specific information does the table display?\""
  },
  {
    "input": "What is the units for the data shown in the table?",
    "output": "What units are being used to represent the information presented in the table?"
  },
  {
    "input": "What is the total revenue earned by the company between 2017 to 2019?",
    "output": "What is the cumulative revenue generated by the company from 2017 to 2019?"
  },
  {
    "input": "What is the exact amount of revenue generated in the EMEA region from 2017 to 2019?",
    "output": "What is the total revenue earned in EMEA between 2017 to 2019?"
  },
  {
    "input": "What is the change in total revenue between 2018 to 2019?",
    "output": "What is the difference in total revenue from 2018 compared to 2019, and how can this information assist in better understanding the financial performance of the company during this period?"
  },
  {
    "input": "What is the sum of all non-current liabilities excluding total other non-current liabilities as of 31 December 2019?",
    "output": "What is the total other non-current liabilities as at 31 December 2019?"
  },
  {
    "input": "What is the aggregate amount of all non-current liabilities, excluding any current liabilities, as of December 31, 2019?",
    "output": "What is the total other non-current liabilities as at 31 December 2019?"
  },
  {
    "input": "What is the depreciation expense on property and equipment as at December 31, 2018?",
    "output": "What is the specific amount of the depreciation expense recorded for property and equipment on the financial statement as at the closing date of December 31, 2018?"
  },
  {
    "input": "What is the depreciation expense on property and equipment as at December 31, 2018?",
    "output": "What was the amount of depreciation expense booked for property and equipment up until December 31st, 2018?"
  },
  {
    "input": "What is the depreciation expense on property and equipment as at December 31, 2019?",
    "output": "As of December 31, 2019, what is the recorded depreciation expense amount for property and equipment?"
  },
  {
    "input": "What is the depreciation expense on property and equipment as at December 31, 2019?",
    "output": "What is the specific amount of depreciation expense recorded for property and equipment as on December 31, 2019?"
  },
  {
    "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?",
    "output": "From 2017 to 2019, what was the cumulative amount of depreciation expense incurred on property and equipment?"
  },
  {
    "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?",
    "output": "What is the cumulative depreciation expense on property and equipment for the years 2017, 2018, and 2019?"
  },
  {
    "input": "What is the allowance for doubtful accounts as at December 31, 2018?",
    "output": "What is the amount set aside for doubtful accounts on the balance sheet as of December 31, 2018?"
  },
  {
    "input": "What are some examples of financial instruments that can subject the company to credit risk?",
    "output": "What financial instruments expose companies to credit risk? Can you provide some examples of such instruments?"
  },
  {
    "input": "What are some examples of financial instruments that can subject the company to credit risk?",
    "output": "What financial instruments can expose a company to credit risk? Can you provide some examples?"
  },
  {
    "input": "How does the company effectively manage and reduce its credit risk associated with accounts receivable?",
    "output": "How does the company mitigate its credit risk pertaining to accounts receivable?"
  },
  {
    "input": "How does the company identify and determine its important or major customers in its business operations?",
    "output": "How does the company determine its significant customers?"
  },
  {
    "input": "How does the company identify and assess its significant customers for business purposes?",
    "output": "How does the company determine its significant customers?"
  },
  {
    "input": "What was the monetary value of the company's losses within its own country during the year 2019?",
    "output": "What is the company's domestic loss in 2019?"
  },
  {
    "input": "\"What was the total financial loss incurred by the company within its own country during the year 2019?\"",
    "output": "What is the company's domestic loss in 2019?"
  },
  {
    "input": "What is the amount of money that the company lost within its own country during the year 2017?",
    "output": "What is the company's domestic loss in 2017?"
  },
  {
    "input": "\"What was the fiscal year 2017 domestic financial loss incurred by the company?\"",
    "output": "What is the company's domestic loss in 2017?"
  },
  {
    "input": "What is the company's total loss before income taxes between 2017 to 2019?",
    "output": "What is the cumulative pre-tax loss of the company from 2017 to 2019?"
  },
  {
    "input": "What is the company's change in foreign income between 2018 and 2019?",
    "output": "What was the percentage change in the company's foreign income from 2018 to 2019?"
  },
  {
    "input": "What is the cumulative amount of loss incurred by the company domestically during the period from 2017 to 2019?",
    "output": "What is the company's total domestic loss between 2017 to 2019?"
  },
  {
    "input": "What is the aggregate amount of losses incurred by the company within its domestic markets during the period from 2017 to 2019?",
    "output": "What is the company's total domestic loss between 2017 to 2019?"
  },
  {
    "input": "What is the percentage change in total deferred revenue 2018 and 2019?",
    "output": "What is the percentage difference in the total deferred revenue between 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in total deferred revenue 2018 and 2019?",
    "output": "What is the percentage difference in the total amount of deferred revenue between the years 2018 and 2019?"
  },
  {
    "input": "What is the proportion of the cost of revenue and research and development expenses as a percentage of the total restructuring expense?",
    "output": "What is the ratio of the combined cost of revenue and research and development expenses to the total restructuring expense, expressed as a percentage?"
  },
  {
    "input": "What is the total legal fees and facilities expenses?",
    "output": "What are the combined expenses for legal fees and facilities?"
  },
  {
    "input": "What is the total legal fees and facilities expenses?",
    "output": "What is the cumulative sum of the expenses incurred in legal fees and facilities?"
  },
  {
    "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?",
    "output": "What is the cumulative increase in tax positions taken during the current year across 2018 and 2019?"
  },
  {
    "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?",
    "output": "What is the combined amount of increments in relation to tax positions that were assumed during the present year in both 2018 and 2019?"
  },
  {
    "input": "As of 31st December 2018, what is the sum of all liabilities that have accumulated or been incurred by the company up to that date?",
    "output": "What is the total accrued liabilities as at 31 December 2018?"
  },
  {
    "input": "What are the units used to measure the values in the table?",
    "output": "What is the units that the values in the table are measured in?"
  },
  {
    "input": "What is the percentage increase or decrease in the total amount of accrued liabilities from 2018 to 2019?",
    "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?"
  },
  {
    "input": "What is the percentage increase or decrease in the total amount of liabilities that have accumulated between the years 2018 and 2019?",
    "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?"
  },
  {
    "input": "What is the total amount of long-lived asset in Japan in both 2018 and 2019?",
    "output": "What is the total value of long-lived assets in Japan for the years 2018 and 2019 combined?"
  },
  {
    "input": "What were the capitalized costs of software development in 2019?",
    "output": "What was the Capitalized software development costs in 2019?"
  },
  {
    "input": "What were the average net costs of software development for both the years 2018 and 2019?",
    "output": "What was the average Software development costs, net for 2018 and 2019?"
  },
  {
    "input": "What were the net average costs for software development in both 2018 and 2019?",
    "output": "What was the average Software development costs, net for 2018 and 2019?"
  },
  {
    "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?",
    "output": "What were the cash and cash equivalents figures for the years 2019 and 2018, respectively?"
  },
  {
    "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?",
    "output": "What were the amounts of Cash and cash equivalents for the years 2019 and 2018, respectively?"
  },
  {
    "input": "What are the average amounts of Restricted cash for the years 2018 and 2019?",
    "output": "What is the average Restricted cash for 2018 and 2019?"
  },
  {
    "input": "What was the Compensation and related benefits in 2019 and 2018 respectively?",
    "output": "What were the compensation and benefits offered in 2019 and 2018?"
  },
  {
    "input": "\"In what specific year did the expenses for professional and legal services amount to a value below 500 thousand dollars?\"",
    "output": "In which year was Professional and legal fees less than 500 thousands?"
  },
  {
    "input": "What was the percentage change in Compensation and related benefits from 2018 to 2019?",
    "output": "What is the change in the Compensation and related benefits from 2018 to 2019?"
  },
  {
    "input": "What were the average professional and legal fees in 2018 and 2019?",
    "output": "What was the average Professional and legal fees for 2018 and 2019?"
  },
  {
    "input": "What were the factors that led to the decrease in net pool allocation? Can you provide an explanation and further details regarding the decline in net pool allocation?",
    "output": "What accounted for the decrease in net pool allocation?"
  },
  {
    "input": "What are the components of vessel operating and supervision costs?",
    "output": "What are the key components that make up the costs associated with vessel operations and supervision?"
  },
  {
    "input": "How much did the technical maintenance expenses differ between 2017 and 2018?",
    "output": "What was the change in technical maintenance expenses from 2017 to 2018?"
  },
  {
    "input": "What was the exact monetary difference in technical maintenance expenses between the years 2017 and 2018?",
    "output": "What was the change in technical maintenance expenses from 2017 to 2018?"
  },
  {
    "input": "What was the percentage increase or decrease in the combined costs of operating and supervising vessels from 2018 to 2019?",
    "output": "What was the percentage change in total vessel operating and supervision costs from 2018 to 2019?"
  },
  {
    "input": "Why does the Group choose to retain ownership of the trade receivables instead of selling them or assigning them to a third party?",
    "output": "Why does the Group hold the trade receivables?"
  },
  {
    "input": "What does accrued income represent?",
    "output": "\"What is the meaning and significance of accrued income?\""
  },
  {
    "input": "What does accrued income represent?",
    "output": "What is the meaning and significance of accrued income?"
  },
  {
    "input": "What was the change in trade receivables from 2018 to 2019?",
    "output": "What was the difference in the amount of trade receivables between 2018 and 2019, indicating whether it increased or decreased?"
  },
  {
    "input": "What was the exact percentage increase or decrease in the total amount of money owed to a company from 2018 to 2019?",
    "output": "What was the percentage change in total receivables from 2018 to 2019?"
  },
  {
    "input": "In which years was the fair value of the derivative assets recorded for?",
    "output": "For which specific years were the fair values of the derivative assets recorded? Please provide all relevant information regarding the years for accurate response."
  },
  {
    "input": "In which specific year did the fair value of interest rate swaps experience an increase compared to other years?",
    "output": "In which year was the fair value of  Interest rate swaps  higher?"
  },
  {
    "input": "When was the fair value of interest rate swaps higher: in which specific year did the fair value of interest rate swaps surpass that of other years?",
    "output": "In which year was the fair value of  Interest rate swaps  higher?"
  },
  {
    "input": "In which year did the company record the highest amount of expenses pertaining to share-based compensation?",
    "output": "In which year was the total expense recognized in respect of share-based compensation the highest?"
  },
  {
    "input": "What was the highest year in terms of total recognized expense for share-based compensation?",
    "output": "In which year was the total expense recognized in respect of share-based compensation the highest?"
  },
  {
    "input": "What was the difference in the amount of money earned by The Cool Pool Limited in 2017 compared to 2018?",
    "output": "What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?"
  },
  {
    "input": "What was the exact difference in the amount of money earned by The Cool Pool Limited in the year 2017 compared to the year 2018?",
    "output": "What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?"
  },
  {
    "input": "For which years were the movements in tangible fixed assets and vessels under construction documented?",
    "output": "In which years was the movements in tangible fixed assets and vessels under construction recorded for?"
  },
  {
    "input": "What was the percentage change in the cost of vessels from the end of 2018 to the end of 2019?",
    "output": "What was the change in vessel cost from end 2018 to end 2019?"
  },
  {
    "input": "In which year was the other non-current assets recorded for?",
    "output": "What is the specific year in which the recording of non-current assets other than the current ones occurred?"
  },
  {
    "input": "In which year did the value of long-term assets, other than the current year, surpass that of the current year?",
    "output": "In which year was the other long-term assets higher?"
  },
  {
    "input": "What was the percentage change in the total value of other non-current assets from the year 2018 to the year 2019?",
    "output": "What was the percentage change in total other non-current assets from 2018 to 2019?"
  },
  {
    "input": "For which years were the earnings per share (EPS) or losses per share (LPS) recorded?",
    "output": "In which years was the earnings/(losses) per share recorded for?"
  },
  {
    "input": "What was the amount of ship management client accounts in 2018?",
    "output": "How many ship management client accounts were there in the year 2018?"
  },
  {
    "input": "What was the change in current accounts  from 2018 to 2019?",
    "output": "What is the difference between the current accounts in 2018 and 2019?"
  },
  {
    "input": "\"What was the specific amount of compensation or payment given to individuals or employees in the year 2018?\"",
    "output": "What was the remuneration in 2018?"
  },
  {
    "input": "What was the amount of compensation received in 2018?",
    "output": "What was the remuneration in 2018?"
  },
  {
    "input": "Which year was the short-term benefits the highest?",
    "output": "What was the peak year for short-term benefits?"
  },
  {
    "input": "Which year was the short-term benefits the highest?",
    "output": "In which year did the short-term benefits reach their peak level?"
  },
  {
    "input": "For which specific years were the costs of constructing the vessels recorded?",
    "output": "In which years was the vessels under construction costs recorded for?"
  },
  {
    "input": "For which years were the recorded construction costs of the vessels?",
    "output": "In which years was the vessels under construction costs recorded for?"
  },
  {
    "input": "What is the significance or meaning behind vessels that are currently being constructed?",
    "output": "What does vessels under construction represent?"
  },
  {
    "input": "In which year were the installment payments for the progress shipyard lower compared to other years?",
    "output": "Which year was the progress shipyard installments lower?"
  },
  {
    "input": "In which specific year did the progress shipyard experience a decrease in the number of installments being made?",
    "output": "Which year was the progress shipyard installments lower?"
  },
  {
    "input": "What was the difference in the average daily hire rate between 2017 and 2018?",
    "output": "What was the change in average daily hire rate from 2017 to 2018?"
  },
  {
    "input": "What is the change in pool gross revenues from 2017 to 2018?",
    "output": "What was the difference in the total revenue generated from pools in 2017 compared to 2018?"
  },
  {
    "input": "What is the change in pool gross revenues from 2017 to 2018?",
    "output": "What is the difference in total gross revenues generated by the pool in 2017 compared to 2018?"
  },
  {
    "input": "In which years was the other payables and accruals recorded for?",
    "output": "For which years was the recording of other payables and accruals made?"
  },
  {
    "input": "How many vessels were hired for employment in the year 2018?",
    "output": "How many vessels were hired in 2018?"
  },
  {
    "input": "In 2018, what was the total number of vessels that were hired?",
    "output": "How many vessels were hired in 2018?"
  },
  {
    "input": "In which year was the accrued interest higher?",
    "output": "\"In which year did the accumulated interest amount exceed the interest accrued in other years?\""
  },
  {
    "input": "What is the percentage difference in the combined value of total payables and accruals between 2018 and 2019?",
    "output": "What was the percentage change in total payables and  accruals from 2018 to 2019?"
  },
  {
    "input": "What was the difference in the ratio of a company's long-term debt to its equity from 2018 to 2019?",
    "output": "What was the change in gearing ratio from 2018 to 2019?"
  },
  {
    "input": "In 2019, how many operating days were there and what was the total revenue generated during those days?",
    "output": "What was the total revenue operating days in 2019?"
  },
  {
    "input": "How many operating days were there in 2019 and what was the total revenue generated during those days?",
    "output": "What was the total revenue operating days in 2019?"
  },
  {
    "input": "What was the change in average daily hire rate from 2018 to 2019?",
    "output": "What was the specific difference in the average rate of daily hiring between 2018 and 2019? How did the average daily hire rate change from one year to the other?"
  },
  {
    "input": "\"What was the specific monetary value of the cash distributions made in the year 2019 for common units?\"",
    "output": "What was the amount of cash distributions in 2019 for common units?"
  },
  {
    "input": "How much did the size of the ATM Programme change?",
    "output": "What was the change in size of the ATM Programme?"
  },
  {
    "input": "\"When was the specific year when the general and administrative expenses were officially documented or accounted for?\"",
    "output": "In which year was the general and administrative expenses recorded for?"
  },
  {
    "input": "Where does the management intend to relocate its employees?",
    "output": "\"What are the specific plans of the management regarding the relocation of its employees?\""
  },
  {
    "input": "What was the selling price of Methane Julia Louise?",
    "output": "How much was Methane Julia Louise sold for?"
  },
  {
    "input": "Which company was the vessel sold to?",
    "output": "To which company was the vessel sold? Please provide the name of the company that purchased the vessel."
  },
  {
    "input": "Which company was the vessel sold to?",
    "output": "\"To which company was the vessel sold?\""
  },
  {
    "input": "What was the percentage change in total right-of use assets  from start to end 2019?",
    "output": "What was the percentage change in the total value of right-of-use assets between the beginning and end of 2019?"
  },
  {
    "input": "How much was the change in the repayment amount for the bank loan?",
    "output": "By how much did the bank loan repayment change?"
  },
  {
    "input": "What year experienced a decrease in audit fees compared to previous years?",
    "output": "In which year was the audit fees lower?"
  },
  {
    "input": "What was the percentage change in audit fees between 2018 and 2019?",
    "output": "What was the change in audit fees from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in audit fees from the year 2018 to the year 2019?",
    "output": "What was the change in audit fees from 2018 to 2019?"
  },
  {
    "input": "What are the components subjected to credit risk?",
    "output": "What are the various components that are exposed to the risk of credit?"
  },
  {
    "input": "What is the significance of bunkers' consumption and other voyage expenses in relation to overall expenses for a voyage?",
    "output": "What does bunkers’ consumption and other voyage expenses represent?"
  },
  {
    "input": "What is the highest year for brokers' commissions on revenue?",
    "output": "In which year was the brokers' commissions on revenue the highest?"
  },
  {
    "input": "What was the change in bunkers’ consumption and other voyage expenses from 2018 to 2019?",
    "output": "What was the difference in the consumption of bunkers and other voyage expenses between 2018 and 2019?"
  },
  {
    "input": "What is the specific monetary value that The Cool Pool Limited is required to pay in 2018?",
    "output": "What is the amount due from The Cool Pool Limited in 2018?"
  },
  {
    "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?",
    "output": "For which years were the recorded dividends receivable and other amounts due from related parties?"
  },
  {
    "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?",
    "output": "In which specific years were the dividends receivable and other outstanding amounts due from related parties recorded?"
  },
  {
    "input": "In which years was the investments recorded for?",
    "output": "For which specific years were the investments recorded?"
  },
  {
    "input": "What was the numerical difference in the number of additions made between 2018 and 2019?",
    "output": "What was the change in additions from 2018 to 2019?"
  },
  {
    "input": "What was the difference in the number of additions between 2018 and 2019?",
    "output": "What was the change in additions from 2018 to 2019?"
  },
  {
    "input": "What were the specific Level 2 municipal and corporate bonds held on December 31, 2018?",
    "output": "What are the respective Level 2 municipal and corporate bonds as at December 31, 2018?"
  },
  {
    "input": "What is the value of municipal bonds as a percentage of the total marketable securities?",
    "output": "What proportion of the overall marketable securities is represented by municipal bonds?"
  },
  {
    "input": "What are the respective goodwill amounts in 2015 and 2016?",
    "output": "What were the goodwill amounts in 2015 and 2016, respectively?"
  },
  {
    "input": "What were the average amounts of total cash, cash equivalents, and marketable securities in the years 2015 and 2016?",
    "output": "What is the average total cash, cash equivalents, and marketable securities in 2015 and 2016?"
  },
  {
    "input": "What is the proportion of shares bought in November out of the total shares purchased in the last three months of 2019?",
    "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?"
  },
  {
    "input": "What is the proportion of shares bought in November out of the total shares purchased during the last three months of 2019?",
    "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?"
  },
  {
    "input": "What is the percentage change in revenue from term license between 2018 and 2019?",
    "output": "What is the percent increase or decrease in revenue from term license for the year 2019 compared to 2018?"
  },
  {
    "input": "What is the percentage increase or decrease in revenue generated from maintenance services from 2018 to 2019?",
    "output": "What is the percentage change in revenue from maintenance between 2018 and 2019?"
  },
  {
    "input": "What were the pre-tax losses in 2018 and 2019 for the company?",
    "output": "What are the respective loss before income tax benefits in 2018 and 2019?"
  },
  {
    "input": "What were the pre-tax domestic and foreign earnings in 2017?",
    "output": "What are the respective domestic and foreign income before income taxes in 2017?"
  },
  {
    "input": "\"What were the pre-tax incomes in 2017, separated by domestic and foreign sources?\"",
    "output": "What are the respective domestic and foreign income before income taxes in 2017?"
  },
  {
    "input": "\"What were the total losses incurred in the domestic market and international markets in 2019, excluding any income tax benefits received?\"",
    "output": "What are the respective domestic and foreign losses before income tax benefits in 2019?"
  },
  {
    "input": "What is the sum of all outstanding receivables that are yet to be invoiced, and are expected to be collected within a period of two years?",
    "output": "What is the total unbilled receivables due within 2 years?"
  },
  {
    "input": "What is the sum of all outstanding amounts receivable that have not been invoiced yet, and are expected to be collected within the next 1 to 5 years?",
    "output": "What is the total unbilled receivables due between 1 to 5 years?"
  },
  {
    "input": "What is the cumulative value of unrealized billings that are yet to be accounted for and are expected to be collected within a period ranging from 1 to 5 years?",
    "output": "What is the total unbilled receivables due between 1 to 5 years?"
  },
  {
    "input": "What is the difference between the company's federal and state net operating losses?",
    "output": "What differentiates the federal and state net operating losses of the company?"
  },
  {
    "input": "What were the amounts that the company is authorised to repurchase in 2019 and 2018 respectively?",
    "output": "What were the authorized repurchase amounts for the company in both 2019 and 2018?"
  },
  {
    "input": "What were the respective amounts of stock repurchases made by the company as at January 1, 2018 and 2019 respectively?",
    "output": "What were the specific amounts of stock repurchases made by the company as of January 1, 2018 and January 1, 2019, respectively?"
  },
  {
    "input": "What was the average amount of stock repurchases as of January 1st in both 2017 and 2018?",
    "output": "What is the average stock repurchases as at January 1, 2017 and 2018?"
  },
  {
    "input": "What is the average amount of stock repurchases as of January 1, 2017, and January 1, 2018?",
    "output": "What is the average stock repurchases as at January 1, 2017 and 2018?"
  },
  {
    "input": "What is the exact percentage difference in the amount of long-term deferred revenue from 2018 to 2019?",
    "output": "What is the percentage change in long-term deferred revenue between 2018 and 2019?"
  },
  {
    "input": "What are the respective values for Term in 2018 and 2019?",
    "output": "What are the values for Term in the year 2018? What are the values for Term in the year 2019?"
  },
  {
    "input": "What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?",
    "output": "What were the stock-based compensation expenses for the cost of revenues incurred by the company in both 2019 and 2018?"
  },
  {
    "input": "What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?",
    "output": "\"What were the stock-based compensation expenses for cost of revenues in 2019 and 2018 for the company?\""
  },
  {
    "input": "What is the average amount of stock-based compensation for the cost of revenue that the company has incurred from 2017 to 2019?",
    "output": "What is the company's average stock-based compensation for the cost of revenue between 2017 to 2019?"
  },
  {
    "input": "What is the average amount of stock-based compensation granted by the company for the selling and marketing department in both 2018 and 2019?",
    "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?"
  },
  {
    "input": "\"What was the average amount of stock-based compensation granted to employees in the selling and marketing department of the company in both 2018 and 2019?\"",
    "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in revenue earned from term license from 2015 to 2016?",
    "output": "What is the percentage change in the revenue earned from term license between 2015 and 2016?"
  },
  {
    "input": "What is the total revenue earned from maintenance in 2017 and 2018?",
    "output": "What is the combined revenue generated from maintenance services in the years 2017 and 2018?"
  },
  {
    "input": "What is the total revenue earned from maintenance in 2017 and 2018?",
    "output": "What was the combined revenue generated from maintenance services in the years 2017 and 2018?"
  },
  {
    "input": "How did actuarial calculations impact the resulting changes in parameters?",
    "output": "What did the changes in parameters on the basis of actuarial calculations lead to?"
  },
  {
    "input": "What was the precise amount of the actual return on plan assets in the fiscal year 2019?",
    "output": "How much did the actual return on plan assets amount to in FY2019?"
  },
  {
    "input": "What items or expenses are typically excluded from the expected payments?",
    "output": "What is not included in expected payments?"
  },
  {
    "input": "Explain what expenses or items are typically not encompassed within the estimated payments.",
    "output": "What is not included in expected payments?"
  },
  {
    "input": "What was the change in interest income in FY2019 from FY2018?",
    "output": "What was the difference in the amount of income generated from interest in the fiscal year 2019 compared to the fiscal year 2018?"
  },
  {
    "input": "What was the change in interest income in FY2019 from FY2018?",
    "output": "What was the specific difference in interest income between the fiscal year 2019 and the fiscal year 2018?"
  },
  {
    "input": "In which year did Asia experience a higher year-on-year percentage change in GDP compared to 2018 during the period from 2018 to 2019?",
    "output": "In which year was the Year-on-year percentage change of GDP in Asia larger from 2018 to 2019?"
  },
  {
    "input": "What was the change in the year-on-year percentage change in GDP for Germany from 2018 to 2019?",
    "output": "What was the difference in the percentage change of Germany's GDP from one year to the next, comparing 2018 and 2019?"
  },
  {
    "input": "In which year did the total sum of taxes collected exceed the amount collected in all other years?",
    "output": "In which year was the total amount of taxes larger?"
  },
  {
    "input": "What was the exact difference in the total amount of taxes paid in the time period from 2018 to 2019 compared to the time period from 2017 to 2018?",
    "output": "What was the change in total taxes in  2018/2019 from 2017/2018?"
  },
  {
    "input": "What was the percentage increase or decrease in the total amount of taxes in 2018/2019 compared to 2017/2018?",
    "output": "What was the percentage change in total taxes in  2018/2019 from 2017/2018?"
  },
  {
    "input": "What factors or criteria are used as a foundation for the measurements being taken?",
    "output": "What is the basis for the measurements?"
  },
  {
    "input": "What is the fundamental basis or principle used to determine the measurements being referred to in the context?",
    "output": "What is the basis for the measurements?"
  },
  {
    "input": "In which specific year did the amount for Belgium surpass or exceed the previous value?",
    "output": "In which year was the amount for Belgium larger?"
  },
  {
    "input": "In which specific year did the total amount (not specified) exceed the amount for Belgium?",
    "output": "In which year was the amount for Belgium larger?"
  },
  {
    "input": "What was the amount of cash and cash equivalents in FY2019?",
    "output": "How much cash and cash equivalents were reported in the financial statements for the fiscal year 2019?"
  },
  {
    "input": "What was the exact percentage difference in Cash and cash equivalents between FY2018 and FY2019 after considering adjustments?",
    "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?"
  },
  {
    "input": "What was the calculated percentage difference in Cash and cash equivalents between FY2019 and FY2018, after adjusting for any relevant factors?",
    "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?"
  },
  {
    "input": "In which specific year did the absolute value of the Earnings per share exceed the value in another year?",
    "output": "In which year was the absolute value of the Earnings per share larger?"
  },
  {
    "input": "\"What specific factor was used to calculate the net cost of the current service?\"",
    "output": "What was the current service cost netted against?"
  },
  {
    "input": "\"In which specific year did the total sales exceed the sales of other years in terms of maximum revenue generated?\"",
    "output": "In which year was the Total sales larger?"
  },
  {
    "input": "In what year did the headcount in METRO Asia exceed the corresponding headcount in other years?",
    "output": "In which year was the amount in METRO Asia headcount larger?"
  },
  {
    "input": "What was the year-over-year change in the number of employees at METRO AG from 2018 to 2019?",
    "output": "What was the change in METRO AG headcount in 2019 from 2018?"
  },
  {
    "input": "With which companies did METRO maintain business relations with, and who were the parties involved in these relations?",
    "output": "Who were the parties to which METRO maintained business relations with related companies?"
  },
  {
    "input": "What were the business relations to related companies listed in the table?",
    "output": "What were the business relations between the listed companies in the table and other related entities or organizations?"
  },
  {
    "input": "What was the difference in the amount and type of services received in the fiscal year 2019 compared to the fiscal year 2018?",
    "output": "What was the change in services received in FY2019 from FY2018?"
  },
  {
    "input": "What is the exact percentage change in the number of services received during fiscal year 2019 compared to fiscal year 2018?",
    "output": "What was the percentage change in services received in FY2019 from FY2018?"
  },
  {
    "input": "What was the exact percentage difference in the quantity of services received during the fiscal year 2019 compared to the fiscal year 2018?",
    "output": "What was the percentage change in services received in FY2019 from FY2018?"
  },
  {
    "input": "In what format is the cash flow statement presented in the table?",
    "output": "What version is the cash flow statement in the table in?"
  },
  {
    "input": "What specific expenses related to income tax were officially acknowledged in the year 2019?",
    "output": "What are the recognised income tax expenses in 2019?"
  },
  {
    "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?",
    "output": "What was the percentage increase or decrease in recognised income tax expenses for the financial year 2018/2019 compared to the financial year 2017/2018?"
  },
  {
    "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?",
    "output": "What was the percentage increase or decrease in recognised income tax expenses for the fiscal year 2018/2019 compared to the fiscal year 2017/2018?"
  },
  {
    "input": "What was the change in EBT in 2018/2019 from 2017/2018?",
    "output": "What was the difference in EBT (Earnings Before Taxes) between 2018 and 2019 when compared to 2017 and 2018?"
  },
  {
    "input": "What does the other operating income include?",
    "output": "What type of income is included under \"other operating income\"? Please provide a comprehensive explanation of the various sources that fall under this category."
  },
  {
    "input": "In which specific year did the amount of Services surpass the quantity in other years?",
    "output": "In which year was the amount of Services larger?"
  },
  {
    "input": "During which year did the quantity of services surpass the quantity in previous years?",
    "output": "In which year was the amount of Services larger?"
  },
  {
    "input": "What was the exact amount of equity during the fiscal year 2019?",
    "output": "What was the amount of equity in FY2019?"
  },
  {
    "input": "What proportion of the total earnings in FY2019 can be attributed to the Noncontrolling interests?",
    "output": "How much of earnings does the Noncontrolling interests account for in FY2019?"
  },
  {
    "input": "What proportion of the FY2019 earnings is represented by the Noncontrolling interests account?",
    "output": "How much of earnings does the Noncontrolling interests account for in FY2019?"
  },
  {
    "input": "What was the exact numerical difference in sales between FY2019 and FY2018?",
    "output": "What was the change in Sales in FY2019 from FY2018?"
  },
  {
    "input": "What was the difference in the total amount of sales between fiscal year 2019 and fiscal year 2018?",
    "output": "What was the change in Sales in FY2019 from FY2018?"
  },
  {
    "input": "When were the calculations of employee numbers by segments performed?",
    "output": "When were the employee numbers by segments calculated?"
  },
  {
    "input": "What were the specific segments or categories mentioned under the \"METRO\" component in the table that accounted for the number of employees?",
    "output": "What were the components under METRO in the table when accounting for the employee numbers by segments?"
  },
  {
    "input": "What was the change in METRO AG in 2019 from 2018?",
    "output": "What was the year-on-year change in METRO AG's performance in 2019 compared to 2018?"
  },
  {
    "input": "What was the percentage difference in METRO AG's performance in 2019 compared to 2018?",
    "output": "What was the percentage change in METRO AG in 2019 from 2018?"
  },
  {
    "input": "What was the percentage difference in the performance of METRO AG in 2019 compared to 2018?",
    "output": "What was the percentage change in METRO AG in 2019 from 2018?"
  },
  {
    "input": "In which year was the amount of total assets larger?",
    "output": "\"When comparing the amount of total assets, which year had a larger value?\""
  },
  {
    "input": "What was the percentage change in total assets in FY2019 from FY2018?",
    "output": "What was the percentage increase or decrease in the total assets for the fiscal year 2019 compared to the total assets for the fiscal year 2018?"
  },
  {
    "input": "What is the source of revenue that compensates for the expenses incurred in providing logistics services?",
    "output": "What is offset by income from logistics services?"
  },
  {
    "input": "What are the components under Other operating expenses in the table?",
    "output": "What specific components are included in the category of \"Other operating expenses\" as mentioned in the table?"
  },
  {
    "input": "What are the components under Other operating expenses in the table?",
    "output": "What specifically are the individual components included in the category of Other operating expenses in the provided table?"
  },
  {
    "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?",
    "output": "\"What is the difference in losses resulting from the sale or disposal of fixed assets between the fiscal years 2017/2018 and 2018/2019?\""
  },
  {
    "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?",
    "output": "What was the difference in the amount of losses incurred from selling fixed assets between the fiscal years 2018/2019 and 2017/2018?"
  },
  {
    "input": "What was the percentage increase or decrease in losses resulting from the disposal of fixed assets during the period of 2018/2019 compared to the period of 2017/2018?",
    "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?"
  },
  {
    "input": "What was the percentage change in losses incurred from the disposal of fixed assets in the fiscal year 2018/2019 compared to the fiscal year 2017/2018?",
    "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?"
  },
  {
    "input": "What was the change in interest carry-forwards in FY2019 from FY2018?",
    "output": "What was the difference in the amount of interest carry-forwards between the fiscal years of 2019 and 2018?"
  },
  {
    "input": "What are the primary elements within the table that are utilized to determine the outcome of the interest calculation?",
    "output": "What are the broad components in the table which are used to calculate the interest result?"
  },
  {
    "input": "What are the various broad components included in the table that are utilized for calculating the outcome of interest?",
    "output": "What are the broad components in the table which are used to calculate the interest result?"
  },
  {
    "input": "What was the percentage change in the amount of income earned from interest during the period between 2018 and 2019 compared to the previous period between 2017 and 2018?",
    "output": "What was the percentage change in interest income in 2018/2019 from 2017/2018?"
  },
  {
    "input": "In which specific year did the total sum of taxes other than general administrative expenses exceed the amount incurred in those administrative expenses?",
    "output": "In which year was the amount of other taxes thereof from general administrative expenses larger?"
  },
  {
    "input": "In which years were the reserves retained from earnings calculated in?",
    "output": "During which specific years were the calculations for retained reserves from earnings conducted?"
  },
  {
    "input": "In which years were the reserves retained from earnings calculated in?",
    "output": "During which specific years were the calculations for retaining reserves from earnings carried out?"
  },
  {
    "input": "What was the change in the Income tax on components of other comprehensive income in FY2019 from FY2018?",
    "output": "What was the difference in the income tax related to the components of other comprehensive income from fiscal year 2018 to fiscal year 2019?"
  },
  {
    "input": "What was the change in operating income in the Communications Solutions segment in 2019?",
    "output": "What was the specific amount of increase or decrease in operating income for the Communications Solutions segment during the year 2019?"
  },
  {
    "input": "What was the change in operating income in the Communications Solutions segment in 2019?",
    "output": "What was the percentage change in operating income specifically for the Communications Solutions segment during the year 2019 compared to the previous year?"
  },
  {
    "input": "Why did operating income decrease in fiscal 2019?",
    "output": "\"What were the reasons for the decrease in operating income during fiscal year 2019?\""
  },
  {
    "input": "Why did operating income decrease in fiscal 2019?",
    "output": "What were the reasons for the decrease in operating income in fiscal 2019?"
  },
  {
    "input": "In which year did the volume or quantity of raw materials reach its highest point?",
    "output": "In which year was Raw materials larger?"
  },
  {
    "input": "In which specific year did the total volume or quantity of raw materials experience a greater increase or reach a higher value compared to previous years?",
    "output": "In which year was Raw materials larger?"
  },
  {
    "input": "In which specific years was the calculation of the Repurchase value conducted?",
    "output": "Which years was the Repurchase value calculated in?"
  },
  {
    "input": "What was the exact difference in the total number of common shares repurchased in the year 2019 compared to 2018?",
    "output": "What was the change in the Number of common shares repurchased in 2019 from 2018?"
  },
  {
    "input": "What was the calculation for determining the percentage change in the quantity of common shares repurchased during the year 2019 in comparison to 2018?",
    "output": "What was the percentage change in the Number of common shares repurchased in 2019 from 2018?"
  },
  {
    "input": "What are the specific categories or divisions for which the net sales figures are displayed in the provided table?",
    "output": "What are the segments for which the net sales are presented in the table?"
  },
  {
    "input": "What year did Industrial Solutions experience a larger size or growth compared to other years?",
    "output": "In which year was Industrial Solutions larger?"
  },
  {
    "input": "What specific changes occurred in the Industrial Solutions sector between 2018 and 2019?",
    "output": "What was the change in Industrial Solutions in 2019 from 2018?"
  },
  {
    "input": "In 2019, what specific changes occurred in the Industrial Solutions sector compared to the previous year of 2018?",
    "output": "What was the change in Industrial Solutions in 2019 from 2018?"
  },
  {
    "input": "In which years was the basic earnings per share calculated for?",
    "output": "For which specific years was the metric of basic earnings per share calculated and used for evaluation?"
  },
  {
    "input": "In which years was the basic earnings per share calculated for?",
    "output": "For which years was the calculation of basic earnings per share carried out? Please provide specific details in your answer."
  },
  {
    "input": "What was the change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?",
    "output": "What was the difference in the dilutive impact of share-based compensation arrangements between 2018 and 2019?"
  },
  {
    "input": "What was the year-on-year percentage change in the dilutive impact caused by share-based compensation arrangements in 2019 compared to 2018?",
    "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?"
  },
  {
    "input": "What was the exact percentage change in the dilutive impact of share-based compensation arrangements between 2018 and 2019?",
    "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?"
  },
  {
    "input": "In which specific year did the percentage of industrial solutions reach its lowest point?",
    "output": "In which year was the percentage of industrial solutions the lowest in?"
  },
  {
    "input": "What was the percentage change in the Weighted-Average Grant-Date Fair Value for nonvested shares between 2018 and 2019?",
    "output": "What was the change in the Weighted-Average Grant-Date Fair Value for nonvested shares in 2019 from 2018?"
  },
  {
    "input": "In the table, what segments were considered when calculating the backlog?",
    "output": "What were the segments for which backlog was calculated in the table?"
  },
  {
    "input": "For which segments in the table was the backlog calculated?",
    "output": "What were the segments for which backlog was calculated in the table?"
  },
  {
    "input": "What was the change in total backlog in 2019 from 2018?",
    "output": "What was the difference in the total backlog between 2019 and 2018?"
  },
  {
    "input": "What was the percentage difference in the total amount of unfinished work in 2019 compared to 2018?",
    "output": "What was the percentage change in total backlog in 2019 from 2018?"
  },
  {
    "input": "In which specific year did the amount spent on acquisition and integration costs exceed any other year?",
    "output": "In which year was Acquisition and integration costs larger?"
  },
  {
    "input": "In which specific year did the expenses incurred for acquisition and integration costs exceed the expenses in other years?",
    "output": "In which year was Acquisition and integration costs larger?"
  },
  {
    "input": "What are the components under U.S. in the table?",
    "output": "Which components are listed under the U.S. category in the table?"
  },
  {
    "input": "What are the different categories of Income Tax Expense (Benefit) mentioned in the table?",
    "output": "What are the types of Income Tax Expense (Benefit) in the table?"
  },
  {
    "input": "In which specific year did Non-U.S. experience the highest income tax expense (benefit)?",
    "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?"
  },
  {
    "input": "In which year did the Non-U.S. current income tax expense (benefit) reach its highest amount?",
    "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?"
  },
  {
    "input": "What was the change in Current income tax expense (benefit) in 2019 from 2018?",
    "output": "What was the difference in Current income tax expense (benefit) between 2019 and 2018?"
  },
  {
    "input": "What is the company's policy regarding the allocation and management of funds?",
    "output": "What is the company's funding policy?"
  },
  {
    "input": "What is the minimum amount of contributions that individuals are required to make to the pension plan outside of the United States?",
    "output": "How much minimum required contributions is expected to be made to the non-U.S. pension plan?"
  },
  {
    "input": "Which fiscal year between 2020 and 2024 will witness the highest benefit payments under the United States plans?",
    "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?"
  },
  {
    "input": "During which fiscal year from 2020 to 2024 will the benefit payments under the United States Plans reach their maximum extent?",
    "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?"
  },
  {
    "input": "What is typically not included in the net earnings of companies operating outside the United States?",
    "output": "What do the Non-U.S. net earnings exclude?"
  },
  {
    "input": "What is excluded from the net earnings of countries other than the U.S. and why?",
    "output": "What do the Non-U.S. net earnings exclude?"
  },
  {
    "input": "What was the percentage increase or decrease in the expense for U.S. federal income tax at the statutory rate in 2019 compared to 2018?",
    "output": "What was the percentage change in Notional U.S. federal income tax expense at the statutory rate in 2019 from 2018?"
  },
  {
    "input": "What are the specific elements or factors taken into account when calculating Restructuring and Other Charges, Net?",
    "output": "What are the components considered under Restructuring and Other Charges, Net?"
  },
  {
    "input": "When did the lowest value of net restructuring charges occur?",
    "output": "In which year was Restructuring charges, net the lowest?"
  },
  {
    "input": "When was the year with the lowest net restructuring charges recorded?",
    "output": "In which year was Restructuring charges, net the lowest?"
  },
  {
    "input": "What was the change in Restructuring charges, net in 2019 from 2018?",
    "output": "What was the difference in the amount of Restructuring charges, net between 2019 and 2018?"
  },
  {
    "input": "What was the change in Restructuring charges, net in 2019 from 2018?",
    "output": "What was the net change in restructuring charges in 2019 compared to 2018?"
  },
  {
    "input": "In 2019, what was the percentage difference in Restructuring charges, net compared to 2018?",
    "output": "What was the percentage change in Restructuring charges, net in 2019 from 2018?"
  },
  {
    "input": "What specific regions do the net sales to external customers correspond to?",
    "output": "What are the net sales by geographic region to external customers attributed to?"
  },
  {
    "input": "In which specific year did the percentage in Americas reach its highest point compared to other years?",
    "output": "In which year was the percentage in Americas the largest?"
  },
  {
    "input": "What is the specific year when the largest percentage was recorded in the Americas?",
    "output": "In which year was the percentage in Americas the largest?"
  },
  {
    "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?",
    "output": "What was the average proportion of net sales generated in the Asia-Pacific region, compared to the total net sales, for the years 2017, 2018, and 2019?"
  },
  {
    "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?",
    "output": "What was the average percentage of total net sales in the Asia-Pacific region for the years 2017, 2018, and 2019 combined?"
  },
  {
    "input": "What was the percentage change in the amount of US dollar-denominated long-term debt from 2018 to 2019?",
    "output": "What was the increase / (decrease) in US dollar-denominated long-term debt from 2018 to 2019?"
  },
  {
    "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?",
    "output": "What is the change in Numerator (basic) – Net income from 2018 to 2019?"
  },
  {
    "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?",
    "output": "What is the absolute change in Numerator (basic) – Net income from the year 2018 to the year 2019?"
  },
  {
    "input": "What is the percentage change in Basic Earnings per share from 2018 to 2019?",
    "output": "What is the increase/ (decrease) in Basic Earnings per share from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in Diluted Earnings per share from 2018 to 2019?",
    "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in Diluted Earnings per share (EPS) between 2018 and 2019?",
    "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?"
  },
  {
    "input": "What was the specific percentage of taxes paid in the year 2019, taking into account all applicable taxes and deductions?",
    "output": "What was the effective tax rate in 2019?"
  },
  {
    "input": "What was the average Computed income tax expense?",
    "output": "What was the average expense for income tax computed?"
  },
  {
    "input": "What was the percentage change in the amount of interest on borrowings from 2018 to 2019?",
    "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in Interest on borrowings from 2018 to 2019?",
    "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?"
  },
  {
    "input": "What was the increase / (decrease) in the Finance costs before interest on lease liabilities from 2018 to 2019?",
    "output": "What was the change in the Finance costs before interest on lease liabilities from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in Adjusted EBITDA between 2018 and 2019?",
    "output": "What was the increase / (decrease) in Adjusted EBITDA from 2018 to 2019?"
  },
  {
    "input": "What was the average Depreciation and amortization?",
    "output": "What was the average amount of depreciation and amortization expenses incurred over a specific period?"
  },
  {
    "input": "What factors contributed to the overall rise in both depreciation and amortization expenses?",
    "output": "What caused the increase in the total depreciation and amortization?"
  },
  {
    "input": "What was the Depreciation of right-of-use assets in 2019?",
    "output": "What was the amount of depreciation recorded for right-of-use assets during the year 2019?"
  },
  {
    "input": "What was the Depreciation of right-of-use assets in 2019?",
    "output": "\"What was the amount of depreciation for right-of-use assets recorded in the financial statements for the year 2019?\""
  },
  {
    "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?",
    "output": "What was the net change in Depreciation, either an increase or decrease, between the years 2018 and 2019?"
  },
  {
    "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?",
    "output": "What was the difference in the Depreciation amount from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the total depreciation and amortization from 2018 to 2019?",
    "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in Total depreciation and amortization from 2018 to 2019?",
    "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?"
  },
  {
    "input": "What was the change in the coupon rate between November 12, 2019, and April 30, 2019?",
    "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?"
  },
  {
    "input": "What was the change in the coupon rate from November 12, 2019 to April 30, 2019?",
    "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?"
  },
  {
    "input": "What was the percentage change in the Equivalent (Cdn$) value from 2018 to November 2019?",
    "output": "What was the increase / (decrease) in the Equivalent (Cdn$) between 2018 and November 2019?"
  },
  {
    "input": "What is the percentage change in the amount of interest on borrowings from 2018 to 2019?",
    "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?"
  },
  {
    "input": "By what percentage did the Interest on borrowings change from 2018 to 2019? Please state whether there was an increase or decrease.",
    "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?"
  },
  {
    "input": "What is the purpose of utilizing valuation-related analysis and making capital structure-related decisions?",
    "output": "What is used for to conduct valuation-related analysis and make capital structure-related decisions?"
  },
  {
    "input": "What items are considered when calculating the adjusted net debt?",
    "output": "What is included in the adjusted net debt?"
  },
  {
    "input": "In which context or industry is the valuation of debt derivatives without incorporating credit risk used as an evaluation tool?",
    "output": "Where is debt derivatives valued without adjustment for credit risk used for evaluation?"
  },
  {
    "input": "What is the net change in long-term debt from December 31, 2018, to December 31, 2019?",
    "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?"
  },
  {
    "input": "What is the change in the amount of Long-term debt from December 31, 2018 to December 31, 2019?",
    "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?"
  },
  {
    "input": "What is the change in the value of net debt derivative assets without credit risk adjustment from December 31, 2018 to December 31, 2019?",
    "output": "What is the increase/ (decrease) in Net debt derivative assets valued without any adjustment for credit risk from December 31, 2018 to December 31, 2019?"
  },
  {
    "input": "What is the method of measuring accounts receivable and what factors are considered in the measurement process?",
    "output": "How is accounts receivable measured?"
  },
  {
    "input": "What is the percentage change in the amount of customer accounts receivable from 2018 to 2019?",
    "output": "What is the increase/ (decrease) in Customer accounts receivable from 2018 to 2019?"
  },
  {
    "input": "\"What is the method employed to settle the principal amount of Convertible Notes in cash upon conversion, and how does it factor into the computation of diluted earnings?\"",
    "output": "Which method is used for to settle the principal of the Convertible Notes in cash on conversion and calculates diluted earnings?"
  },
  {
    "input": "What are the consequences when a loss is incurred that can be attributed to shareholders?",
    "output": "What happens if a loss attributable to shareholders has been incurred?"
  },
  {
    "input": "What was the Convertible Notes that had an anti-dilutive effect on the calculation of diluted earnings per common share in the year end 2019, 2018 and 2017 respectively?",
    "output": "What were the Convertible Notes that impacted diluted earnings per common share calculation in 2019, 2018, and 2017, specifically in terms of their anti-dilutive effects?"
  },
  {
    "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?",
    "output": "What is the difference in the loss per common share - basic and diluted between 2018 and 2019?"
  },
  {
    "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?",
    "output": "What is the difference in basic and diluted Loss per common share between 2018 and 2019?"
  },
  {
    "input": "How are the recognition processes for realized and unrealized losses different?",
    "output": "How are the realized and unrealized losses recognized?"
  },
  {
    "input": "How are both realized and unrealized losses recognized? Please provide a detailed explanation of the recognition process for each type of loss.",
    "output": "How are the realized and unrealized losses recognized?"
  },
  {
    "input": "In which specific year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands?",
    "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?"
  },
  {
    "input": "\"In which fiscal year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands (1 million)?\"",
    "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?"
  },
  {
    "input": "What were the total fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018, respectively?",
    "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?"
  },
  {
    "input": "What were the fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018?",
    "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?"
  },
  {
    "input": "What are the components or factors included in the income tax expense calculation?",
    "output": "What is included in the income tax expense?"
  },
  {
    "input": "What items are incorporated into the income tax expense?",
    "output": "What is included in the income tax expense?"
  },
  {
    "input": "What is the difference in the number of positions taken in previous years between December 31, 2019, and December 31, 2018?",
    "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What is the net change in the number of positions taken in prior years from December 31, 2019 to December 31, 2018?",
    "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What led to increase in Consolidated net cash flow from operating activities for the year ended December 31, 2019?",
    "output": "What factors contributed to the rise in Consolidated net cash flow from operating activities during the year ending on December 31, 2019?"
  },
  {
    "input": "How much did Brookfield receive from the sale of its interests in Altera in 2019?",
    "output": "How much was received from Brookfield for the sale of interests in Altera during 2019?"
  },
  {
    "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in the amount of cash generated or used for financing activities in the financial statements for the years ending December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in net cash flows from financing activities between the fiscal years ended on December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What was the difference in the Net (loss) income between the fiscal years ending on December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in the amount of net (loss) income between the fiscal year ending on December 31, 2019, and the fiscal year ending on December 31, 2018?"
  },
  {
    "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What is the difference in the amount of income tax expense or recovery reported for the fiscal year ended on December 31, 2019, compared to the fiscal year ended on December 31, 2018?"
  },
  {
    "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "How does the income tax expense (recovery) change between the years ended December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the sum of all minimum lease payments expected to be received in the year 2019?",
    "output": "What was the total Total minimum lease payments to be received in 2019？"
  },
  {
    "input": "What is the total sum of minimum lease payments that were expected to be received in the year 2019?",
    "output": "What was the total Total minimum lease payments to be received in 2019？"
  },
  {
    "input": "What is the change in Initial direct costs and other from December 31, 2019 to December 31, 2018?",
    "output": "How did the initial direct costs and other expenses change from December 31, 2018, to December 31, 2019?"
  },
  {
    "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?",
    "output": "\"When was the complete sale of Teekay's remaining holdings in Altera to Brookfield executed?\""
  },
  {
    "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?",
    "output": "\"When did Teekay sell its remaining interests in Altera to Brookfield?\""
  },
  {
    "input": "What were the Fair values at the start of the year for 2019 and 2018, respectively? Please provide the Fair value figures for both years separately.",
    "output": "What was the Fair value at the beginning of the year for 2019 and 2018 respectively?"
  },
  {
    "input": "In which year was Fair value at the beginning of the year less than 15,000 thousands?",
    "output": "In which year did the fair value at the beginning of the year fall below 15,000,000 (15 million) dollars?"
  },
  {
    "input": "\"What items are typically included in the calculation of current income tax expense, and what factors can affect this expense?\"",
    "output": "What is included in current income tax expense?"
  },
  {
    "input": "What components are typically encompassed in the calculation of contemporary income tax expense?",
    "output": "What is included in current income tax expense?"
  },
  {
    "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?",
    "output": "How much has the Current income tax expense changed from December 31, 2019, to December 31, 2018?"
  },
  {
    "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?",
    "output": "What was the difference in Current income tax expense between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "\"Between two years, which one exhibits a higher expense for current income tax?\"",
    "output": "Which year has higher current income tax expense?"
  },
  {
    "input": "What were the Lease and Non-Lease commitments in the year 2021, and can you provide specific details about each commitment?",
    "output": "What was the Lease and Non-Lease commitment in 2021 respectively?"
  },
  {
    "input": "What was the amount of lease commitment for the fiscal year 2022?",
    "output": "What was the Lease commitment in 2022?"
  },
  {
    "input": "What was the total value of lease commitments in the year 2022?",
    "output": "What was the Lease commitment in 2022?"
  },
  {
    "input": "In which year was the Lease commitment less than 10,000 thousands?",
    "output": "In which specific year did the Lease commitment amount to less than 10,000 thousand units?"
  },
  {
    "input": "In which year was the Lease commitment less than 10,000 thousands?",
    "output": "In which specific year did the Lease commitment amount fall below 10,000 thousands? (For better answering, please state the year and the unit of the Lease commitment.)"
  },
  {
    "input": "What is the difference in the lease commitment amount between 2020 and 2021?",
    "output": "What is the change in the Lease commitment from 2020 to 2021?"
  },
  {
    "input": "What is the average duration of lease agreements from 2020 to 2022?",
    "output": "What is the average Lease Commitment from 2020 to 2022?"
  },
  {
    "input": "What is the average duration of lease commitments between 2020 and 2022?",
    "output": "What is the average Lease Commitment from 2020 to 2022?"
  },
  {
    "input": "How much was the equity loss for the year ended December 31, 2019?",
    "output": "What was the amount of equity lost during the period from January 1 to December 31, 2019?"
  },
  {
    "input": "How much was the equity loss for the year ended December 31, 2019?",
    "output": "What was the total amount of equity loss recorded for the period of the year ending December 31, 2019?"
  },
  {
    "input": "What is the difference in the combined amount of cash and restricted cash on December 31, 2019, compared to December 31, 2018?",
    "output": "What is the change in Cash and restricted cash from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What was the increase or decrease in the category of Other assets – current on the company's balance sheet from December 31, 2019 to December 31, 2018?",
    "output": "What is the change in Other assets – current from December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What were the values of the other current assets in the years 2019 and 2018?",
    "output": "What was the other assets - current in 2019 and 2018 respectively?"
  },
  {
    "input": "What were the respective amounts of non-current liabilities in 2019 and 2018 aside from the liabilities mentioned earlier?",
    "output": "What was the other liabilities - non current in 2019 and 2018 respectively?"
  },
  {
    "input": "What were the amounts of non-current liabilities in 2019 and 2018, respectively, excluding any other liabilities that may be present on the financial statements?",
    "output": "What was the other liabilities - non current in 2019 and 2018 respectively?"
  },
  {
    "input": "What is the difference in the number of Consolidated LNG carriers between 2020 and 2021?",
    "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?"
  },
  {
    "input": "What is the net difference in the number of Consolidated LNG carriers between 2020 and 2021?",
    "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?"
  },
  {
    "input": "What is the annual growth or decline in the number of Consolidated LNG carriers from 2021 to 2022?",
    "output": "What is the change in Consolidated LNG carriers from 2021 to 2022?"
  },
  {
    "input": "What is the average number of Consolidated LNG carriers for the period from 2020 to 2021?",
    "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?"
  },
  {
    "input": "What is the average number of Consolidated LNG carriers between the years 2020 and 2021?",
    "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?"
  },
  {
    "input": "What is the change in Realized (losses) gains relating to Interest rate swap agreements from Year Ended December 31, 2019 to December 31, 2018?",
    "output": "What was the difference in Realized (losses) gains associated with Interest rate swap agreements between December 31, 2019 and December 31, 2018?"
  },
  {
    "input": "What is the difference in Realized losses and gains from Forward freight agreements between Year Ended December 31, 2019 and December 31, 2018?",
    "output": "What is the change in Realized (losses) gains relating to Forward freight agreements from Year Ended December 31, 2019 to December 31, 2018?"
  },
  {
    "input": "What was the difference in revenue between 2019 and 2018?",
    "output": "What is the change in Revenues from, 2019 to 2018?"
  },
  {
    "input": "What is the change in Voyage expenses from, 2019 to 2018?",
    "output": "What is the difference in expenses for voyages comparing 2019 to 2018?"
  },
  {
    "input": "How is the expense of share-based compensation quantified and evaluated?",
    "output": "How is share-based compensation expense measured?"
  },
  {
    "input": "How is the expense related to share-based compensation quantified and determined?",
    "output": "How is share-based compensation expense measured?"
  },
  {
    "input": "What are the various types of share-based compensation plans in existence?",
    "output": "What are the types of share-based compensation plans?"
  },
  {
    "input": "By what percentage did the stock-based compensation expense for general and administrative operations decrease from 2018 to 2019?",
    "output": "How much did the stock-based compensation expense in the general and administrative operations decreased from 2018 to 2019?"
  },
  {
    "input": "What is the ending outstanding number of vested RSUs in 2019?",
    "output": "What is the final count of vested Restricted Stock Units (RSUs) at the conclusion of 2019?"
  },
  {
    "input": "What is the ending outstanding number of vested RSUs in 2019?",
    "output": "How many RSUs were left as outstanding in the year 2019 once they had vested?"
  },
  {
    "input": "What was the exact amount of depreciation expense recorded in the financial statements for the year 2018?",
    "output": "What was the depreciation expense in 2018?"
  },
  {
    "input": "What are the accepted accounting methods for handling and recording disposed assets?",
    "output": "What are the accounting treatments for disposed assets?"
  },
  {
    "input": "What are the accounting procedures to be followed specifically for assets that have been disposed of?",
    "output": "What are the accounting treatments for disposed assets?"
  },
  {
    "input": "What is the net total for property, plant and equipment in 2019?",
    "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation in 2019?"
  },
  {
    "input": "What is the net total for property, plant and equipment in 2019?",
    "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation and any impairment losses, as reported in the financial statements for the year 2019?"
  },
  {
    "input": "What is the percentage decrease in the net value of Property, Plant, and Equipment between 2018 and 2019?",
    "output": "What is the percentage decrease in Net Total Property, Plant and Equipment from 2018 to 2019?"
  },
  {
    "input": "How much was the increase in Federal R&D tax credits from 2018 to 2019?",
    "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase in R&D tax credits provided by the Federal government from 2018 to 2019?",
    "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?"
  },
  {
    "input": "\"What was the amount recorded as a long-term liability in the financial statements for the year 2018?\"",
    "output": "How much was reflected as a long-term liability in 2018?"
  },
  {
    "input": "What is the percentage difference in Net deferred tax assets from 2018 to 2019?",
    "output": "What was the percentage change in Net deferred tax assets between 2018 and 2019?"
  },
  {
    "input": "What was the total cost of the service provided in the year 2018?",
    "output": "What was the service cost in 2018?"
  },
  {
    "input": "What was the cost of the service provided in the year 2018?",
    "output": "What was the service cost in 2018?"
  },
  {
    "input": "What was the difference in the cost of services from 2018 to 2019?",
    "output": "What was the change in service cost between 2018 and 2019?"
  },
  {
    "input": "What was the change in interest cost between 2018 and 2019?",
    "output": "What was the difference in the amount of money spent on interest between the years 2018 and 2019?"
  },
  {
    "input": "What was the change in interest cost between 2018 and 2019?",
    "output": "How did the interest cost change from 2018 to 2019, and what is the difference between the two years?"
  },
  {
    "input": "What was the change in balances at the end of period between 2018 and 2019?",
    "output": "What is the difference in balances at the end of the period between 2018 and 2019, and what caused this change?"
  },
  {
    "input": "What was the specific monetary value or total sum allocated for Machinery and equipment in the year 2018?",
    "output": "What was the amount for Machinery and equipment in 2018?"
  },
  {
    "input": "\"What was the specific monetary value allocated to Machinery and equipment in the year 2018?\"",
    "output": "What was the amount for Machinery and equipment in 2018?"
  },
  {
    "input": "What was the change in Net cash provided by (used in) operating activities from continuing operations between 2018 and 2019?",
    "output": "What was the difference in Net cash provided by (used in) operating activities from continuing operations between the years 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount of Net cash provided by (used in) operating activities between the years 2018 and 2019?",
    "output": "What was the change in Net cash provided by (used in) operating activities between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in Income (loss) from discontinued operations, net of income taxes between 2018 and 2019?",
    "output": "What was the percentage change in net income (loss) from discontinued operations, after accounting for income taxes, between the fiscal years 2018 and 2019?"
  },
  {
    "input": "Which periods does the company's effective tax rates differ from the U.S. federal statutory rate?",
    "output": "\"During which specific time periods does the effective tax rate of the company deviate from the U.S. federal statutory rate?\""
  },
  {
    "input": "What was the specific framework or arrangement regarding Federal taxes in the year 2018, including any applicable laws, regulations, rates, and deductions?",
    "output": "What was the current provision for Federal taxes in 2018?"
  },
  {
    "input": "What were the existing federal tax regulations and provisions for the year 2018?",
    "output": "What was the current provision for Federal taxes in 2018?"
  },
  {
    "input": "What was the specific provision made for foreign taxes in the year 2019?",
    "output": "What was the current provision for Foreign taxes in 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the fair value of intangible assets from 2018 to 2019?",
    "output": "What was the percentage change in the fair value of intangible assets between 2018 and 2019?"
  },
  {
    "input": "What was the change in Pro Forma total sales between 2018 and 2019?",
    "output": "What was the difference in the total sales of Pro Forma from 2018 to 2019?"
  },
  {
    "input": "What was the change in Pro Forma total sales between 2018 and 2019?",
    "output": "What was the difference in Pro Forma total sales from 2018 to 2019?"
  },
  {
    "input": "What was the exact fair value of the property and equipment owned by LumaSense?",
    "output": "What was the fair value of  Property and equipment from LumaSense?"
  },
  {
    "input": "What was the fair value of Property and equipment acquired by LumaSense?",
    "output": "What was the fair value of  Property and equipment from LumaSense?"
  },
  {
    "input": "What was the difference in the assumed exercise of dilutive stock options and restricted stock units between the years 2018 and 2019?",
    "output": "What was the change in Assumed exercise of dilutive stock options and restricted stock units between 2018 and 2019?"
  },
  {
    "input": "What was the disparity in the fair value of cash paid for acquisition between Trek and the Electrostatic Product Line?",
    "output": "What was the difference in the fair value of cash paid for acquisition between Trek and Electrostatic Product Line?"
  },
  {
    "input": "What was the diluted earnings per share (EPS) specifically for Continuing Operations for the quarter ending in September?",
    "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?"
  },
  {
    "input": "What was the diluted earnings per share of Continuing Operations in the quarter ending in September?",
    "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?"
  },
  {
    "input": "What was the total Income (loss) from discontinued operations, net of income taxes in 2019?",
    "output": "In 2019, what was the net income (loss) from discontinued operations, adjusted for income taxes?"
  },
  {
    "input": "What was the difference in total operating expense between the fiscal years 2018 and 2019?",
    "output": "What was the change in total operating expense between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the amount of money spent on operational expenses from 2018 to 2019?",
    "output": "What was the change in total operating expense between 2018 and 2019?"
  },
  {
    "input": "How much money was owed to the company for goods or services provided in 2019 but not yet invoiced?",
    "output": "What was the amount of unbilled receivables in 2019?"
  },
  {
    "input": "What were the precise amounts that were billed during the year 2018 after considering all deductions, fees, and payments?",
    "output": "What were the net amounts billed in 2018?"
  },
  {
    "input": "What was the change in unbilled receivables between 2018 and 2019?",
    "output": "What was the difference in the amount of unbilled receivables from 2018 to 2019?"
  },
  {
    "input": "What was the difference in the total amount billed between 2018 and 2019?",
    "output": "What was the change in net amounts billed between 2018 and 2019?"
  },
  {
    "input": "What was the accumulated amortization of customer relationships in 2018?",
    "output": "\"What was the total amount of accumulated amortization recorded for customer relationships during the year 2018?\""
  },
  {
    "input": "What were the top two segments with the highest net carrying amounts in the year 2018?",
    "output": "What was the two highest net carrying amounts segments in 2018?"
  },
  {
    "input": "What were the two segments with the highest net carrying amounts in 2018?",
    "output": "What was the two highest net carrying amounts segments in 2018?"
  },
  {
    "input": "How much did the service cost in 2019 and can you provide more details about the specific service being referred to?",
    "output": "What was service cost in 2019?"
  },
  {
    "input": "What was the total amount spent on interest expenses in the year 2018?",
    "output": "What was interest cost in 2018?"
  },
  {
    "input": "What was the expected rate of return on plan assets for the year 2017?",
    "output": "What was the Expected return on plan assets in 2017?"
  },
  {
    "input": "What was the anticipated return on assets for the plan in 2017?",
    "output": "What was the Expected return on plan assets in 2017?"
  },
  {
    "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?",
    "output": "What was the difference in the amount of amortization of actuarial gains and losses recorded between the years 2017 and 2018?"
  },
  {
    "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?",
    "output": "What was the difference in the amount of Amortization of actuarial gains and losses between the fiscal years 2017 and 2018?"
  },
  {
    "input": "What were the specific expenses incurred for severance payments and any associated costs during the year 2019?",
    "output": "What were the severance and related charges in 2019?"
  },
  {
    "input": "What was the amount of severance and associated expenses recorded as charges in the year 2019?",
    "output": "What were the severance and related charges in 2019?"
  },
  {
    "input": "What was the change in Severance and related charges between 2018 and 2019?",
    "output": "What was the year-on-year difference in the amount spent on Severance and related charges between 2018 and 2019?"
  },
  {
    "input": "What was the change in Severance and related charges between 2018 and 2019?",
    "output": "What was the difference in the amount of Severance and related charges incurred in 2018 compared to 2019?"
  },
  {
    "input": "What is the proportion of facility relocation and closure charges in the total amount of restructuring charges incurred in 2019?",
    "output": "What percentage of total restructuring charges in 2019 consisted of Facility relocation and closure charges?"
  },
  {
    "input": "What was the contrast between the combined assets in the Level 1 and Level 2 categories for the year 2019?",
    "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?"
  },
  {
    "input": "What was the difference between the total assets in the Level 1 and Level 2 categories for the year 2019?",
    "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?"
  },
  {
    "input": "What was the combined value of the top three assets in the Level 2 Category for the year 2019?",
    "output": "What was the total of the three highest assets in the Level 2 Category for 2019?"
  },
  {
    "input": "What percentage of total assets across all categories consisted of assets from the Level One category for 2019?",
    "output": "What was the proportion or share, in terms of percentage, of Level One assets out of the total assets across all categories for the year 2019?"
  },
  {
    "input": "What was the specific value or monetary figure of long-lived assets in the United States during the year 2019?",
    "output": "What was the amount of long-lived assets in United States in 2019?"
  },
  {
    "input": "What was the net difference in the value of long-term assets in the United States from 2018 to 2019?",
    "output": "What was the change in the amount of long-lived assets between 2018 and 2019 in United States?"
  },
  {
    "input": "\"What was the combined value of the two longest-lasting assets in the year 2018?\"",
    "output": "What is the sum of the highest two long-lived assets in 2018?"
  },
  {
    "input": "\"What is the total value, in monetary terms, of the two long-lived assets with the highest worth that were recorded in the year 2018?\"",
    "output": "What is the sum of the highest two long-lived assets in 2018?"
  },
  {
    "input": "What was the initial estimated value of property and equipment as of December 31, 2019 in relation to their market worth?",
    "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?"
  },
  {
    "input": "What was the initial estimated value of property and equipment as of December 31, 2019, before any adjustments or final calculations were made?",
    "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?"
  },
  {
    "input": "What was the initial estimated value assigned to inventories as of September 10th, 2019?",
    "output": "What was the Preliminary fair value of inventories in September 10, 2019?"
  },
  {
    "input": "\"What was the initial estimated market value of the inventories as of September 10, 2019?\"",
    "output": "What was the Preliminary fair value of inventories in September 10, 2019?"
  },
  {
    "input": "In 2019, what was the specific amount of revenue that was recognized for product sales and the provision of services at a particular point in time?",
    "output": "What was the Product and service revenue recognized at point in time in 2019?"
  },
  {
    "input": "What specific extended warranty and service contracts were acknowledged and recorded throughout the year of 2018?",
    "output": "What was the Extended warranty and service contracts recognized over time in 2018?"
  },
  {
    "input": "What were the specific extended warranty and service contracts that gained recognition throughout the year 2018?",
    "output": "What was the Extended warranty and service contracts recognized over time in 2018?"
  },
  {
    "input": "What was the change in Product and service revenue recognized at point in time between 2018 and 2019?",
    "output": "What was the specific difference in revenue from the sales of products and services recognized at a specific moment between the years 2018 and 2019?"
  },
  {
    "input": "What were the sales figures for the Industrial & Medical sectors in the year 2017?",
    "output": "What was the sales for Industrial & Medical in 2017?"
  },
  {
    "input": "What were the sales figures specifically for Industrial & Medical sectors in the year 2017?",
    "output": "What was the sales for Industrial & Medical in 2017?"
  },
  {
    "input": "What was the percentage change in sales of Semiconductor Equipment from 2017 to 2018?",
    "output": "What was the change in sales of Semiconductor Equipment between 2017 and 2018?"
  },
  {
    "input": "What were the income taxes for the 2018 tax year, based on the federal statutory tax rates?",
    "output": "What was the Income taxes per federal statutory rate in 2018?"
  },
  {
    "input": "What was the State income taxes, net of federal deduction in 2017?",
    "output": "In 2017, what was the amount of state income taxes paid after deducting the federal deduction?"
  },
  {
    "input": "What were the specific modifications that occurred in the GILTI Tax legislation from 2018 to 2019?",
    "output": "What was the change in GILTI Tax between 2018 and 2019?"
  },
  {
    "input": "What was the difference in the GILTI Tax rates from 2018 to 2019, and how did the change impact tax calculations?",
    "output": "What was the change in GILTI Tax between 2018 and 2019?"
  },
  {
    "input": "What was the change in Unremitted earnings between 2018 and 2019?",
    "output": "What was the difference in Unremitted earnings from 2018 to 2019? Please provide the specific amount of change in Unremitted earnings between these two years."
  },
  {
    "input": "What was the percentage difference in Withholding taxes from 2018 to 2019?",
    "output": "What was the percentage change in Withholding taxes between 2018 and 2019?"
  },
  {
    "input": "What is the estimated rate of forfeiture?",
    "output": "What was the forfeiture rate estimated to be?"
  },
  {
    "input": "What is total non-cash compensation expense related to stock-based awards in 2019?",
    "output": "\"What was the amount of expenses incurred in 2019 for stock-based awards as a form of total non-cash compensation?\""
  },
  {
    "input": "What is the range of years covered in the table?",
    "output": "What are the years included in the table?"
  },
  {
    "input": "\"What is the final balance as of December 31, 2019?\"",
    "output": "What is the balance at end of year for 2019?"
  },
  {
    "input": "In which specific markets does the company focus its marketing efforts and aim to capture a significant share of customers?",
    "output": "Which markets does the company target?"
  },
  {
    "input": "What specific types of research and development (R&D) activities are sponsored by companies? Please provide a comprehensive list of these activities as outlined in the table.",
    "output": "What are the types of company-sponsored R&D activities in the table?"
  },
  {
    "input": "What types of research and development (R&D) activities supported by companies are listed in the table?",
    "output": "What are the types of company-sponsored R&D activities in the table?"
  },
  {
    "input": "What was the difference in the total expenditure on research and development sponsored by the company from 2018 to 2019?",
    "output": "What is the change in the total company-sponsored research and development expense in 2019 from 2018?"
  },
  {
    "input": "What is the change in total backlog in 2019?",
    "output": "What was the overall increase or decrease in the total backlog during the year 2019?"
  },
  {
    "input": "What are the segments under Total Backlog in the table?",
    "output": "What specific categories or divisions are included within the Total Backlog section of the table?"
  },
  {
    "input": "How many distinct segments are there included in the Total Backlog?",
    "output": "How many segments are there under Total Backlog?"
  },
  {
    "input": "How many distinct segments are included within the category of Total Backlog?",
    "output": "How many segments are there under Total Backlog?"
  },
  {
    "input": "What is the specific maturity date or time period for the long-term debt that is expected to be due in 2021?",
    "output": "What is the maturity of long-term debt for 2021?"
  },
  {
    "input": "What are the components recorded under income (loss) from continuing operations before income taxes?",
    "output": "What specific items are included in the income (loss) from continuing operations category before taxes are deducted?"
  },
  {
    "input": "In which year was the amount under Foreign the smallest?",
    "output": "\"When was the lowest recorded value for the amount under the category 'Foreign' observed?\""
  },
  {
    "input": "What is the change in the amount under Foreign in 2019 from 2018?",
    "output": "What is the difference in the amount under the category \"Foreign\" between 2019 and 2018?"
  },
  {
    "input": "What is the change in the amount under Foreign in 2019 from 2018?",
    "output": "How much did the Foreign amount change from 2018 to 2019?"
  },
  {
    "input": "In which year was the amount of sales in Other the largest?",
    "output": "In which year did the category labeled as \"Other\" witness the highest recorded sales volume?"
  },
  {
    "input": "What is the percentage change in the amount of Other in 2019 from 2018?",
    "output": "What is the percentage difference in the amount of Other expenses or income in the year 2019 compared to 2018, and how can it be calculated?"
  },
  {
    "input": "What is the tax expense for 2019 calculated at the U.S. statutory rate?",
    "output": "What is the Tax expense at U.S. statutory rate for 2019?"
  },
  {
    "input": "What was the tax expense for the year 2019 in the United States at the statutory rate?",
    "output": "What is the Tax expense at U.S. statutory rate for 2019?"
  },
  {
    "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?",
    "output": "What is the percentage difference in the amount of expenses that cannot be deducted in 2019 compared to 2018?"
  },
  {
    "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?",
    "output": "What is the percentage increase or decrease in expenses that cannot be deducted from taxes in the year 2019 compared to the year 2018?"
  },
  {
    "input": "\"When were the costs associated with purchasing and developing the ERP system recorded?\"",
    "output": "In which years were the incurred costs related to the purchase and development of the ERP system recorded?"
  },
  {
    "input": "In which year did the total value of property, plant, and equipment reach its highest level?",
    "output": "In which year was the total amount of property, plant and equipment larger?"
  },
  {
    "input": "In which specific year did the combined value of property, plant, and equipment reach its highest point?",
    "output": "In which year was the total amount of property, plant and equipment larger?"
  },
  {
    "input": "What is the change in leasehold improvements from 2018 to 2019?",
    "output": "What was the year-on-year change in the value of leasehold improvements between 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in leasehold improvements from 2018 to 2019?",
    "output": "What is the percentage difference in the value of leasehold improvements from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in CGD sales in 2018?",
    "output": "What was the percentage increase or decrease in CGD sales in 2018 compared to the previous year?"
  },
  {
    "input": "What is the percentage change in CGD sales in 2018?",
    "output": "What is the exact percentage difference in sales of CGD products between the years 2018 and the previous year?"
  },
  {
    "input": "For which years is the amortization of purchased intangibles included in the CGD results recorded?",
    "output": "In the recorded CGD results, during which specific years is the amortization of purchased intangibles included?"
  },
  {
    "input": "In which year does the inclusion of the amortization of purchased intangibles result in a higher amount in the Comprehensive Gain or Loss From Discontinued Operations (CGD)?",
    "output": "In which year is the amortization of purchased intangibles included in the CGD results larger?"
  },
  {
    "input": "\"What criteria or requirements must be met in order to be eligible for receiving an extra cash payment amounting to $3.0 million?\"",
    "output": "What is the eligibility to receive an additional cash payment of $3.0 million based on?"
  },
  {
    "input": "During which years was the method of calculating net income (loss) per common share using the weighted-average number of shares outstanding implemented and documented?",
    "output": "For which years was the weighted-average number of shares outstanding used to compute net income (loss) per common share recorded?"
  },
  {
    "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?",
    "output": "What is the exact percentage change in adjusted EBITDA between the years 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?",
    "output": "What is the percentage difference in adjusted EBITDA between 2019 and 2018? How can this information be used to analyze the financial performance in these two years?"
  },
  {
    "input": "What resulted in a higher operating income?",
    "output": "What factor led to an increase in operating income?"
  },
  {
    "input": "How many finished products were produced in the year 2019?",
    "output": "What is the amount of finished products in 2019?"
  },
  {
    "input": "What was the total quantity of completed products manufactured in the year 2019?",
    "output": "What is the amount of finished products in 2019?"
  },
  {
    "input": "In which year does the value of finished products surpass the value of finished products in other years, resulting in a higher overall value?",
    "output": "In which year is the value of finished products higher?"
  },
  {
    "input": "What are the differences in the types of materials and purchased parts between 2018 and 2019?",
    "output": "What is the change in materials and purchased parts from 2018 to 2019?"
  },
  {
    "input": "\"What factors contributed to the increase in operating profits?\"",
    "output": "What resulted in the improvement in operating profits?"
  },
  {
    "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?",
    "output": "In which fiscal year(s) is the CGD results recorded and does it include the amortization amount of purchased intangibles?"
  },
  {
    "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?",
    "output": "In which fiscal year(s) is the amortization amount of purchased intangibles included in the recorded results of the Comprehensive Growth and Development (CGD)?"
  },
  {
    "input": "Which year, from the given weighted-average assumptions for determining benefit obligation as of September 30, exhibits the highest rate of compensation increase among all years?",
    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, which year has the largest rate of compensation increase?"
  },
  {
    "input": "What was the change in the discount rate between 2018 and 2019 for determining the benefit obligation at September 30?",
    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the change in the discount rate in 2019 from 2018?"
  },
  {
    "input": "What is the weighted-average discount rate used to calculate the benefit obligation as of September 30 for the years 2017, 2018, and 2019?",
    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the average discount rate across 2017, 2018 and 2019?"
  },
  {
    "input": "In what specific year did the rate differential for foreign exchange exceed 10.0?",
    "output": "In which year was Foreign rate differential greater than 10.0?"
  },
  {
    "input": "What was the average amount of state taxes, excluding federal benefits, for the years 2017 to 2019?",
    "output": "What was the average State taxes net of federal benefit for 2017-2019?"
  },
  {
    "input": "How much did the Research and Development credits shift between 2018 and 2019?",
    "output": "What is the change in the Research and development credits from 2018 to 2019?"
  },
  {
    "input": "What is the difference in the amount of Research and Development credits between 2018 and 2019?",
    "output": "What is the change in the Research and development credits from 2018 to 2019?"
  },
  {
    "input": "What were the respective gains in 2019 and 2018?",
    "output": "What were the gains achieved in 2019 and 2018, respectively?"
  },
  {
    "input": "What were the respective gains in 2019 and 2018?",
    "output": "What were the gains in 2019 and 2018, respectively?"
  },
  {
    "input": "What is the specific year when the Loss from discontinued operations was exactly zero?",
    "output": "In which year was Loss from discontinued operations 0?"
  },
  {
    "input": "What was the average amount of money lost from 2017 to 2019?",
    "output": "What was the average Net loss for 2017 to 2019?"
  },
  {
    "input": "What is the change in the Weighted average common shares outstanding-basic from 2018 to 2019?",
    "output": "What is the difference in the number of weighted average common shares outstanding-basic between 2018 and 2019?"
  },
  {
    "input": "What was the depreciation and amortization expense related to property and equipment for 2019?",
    "output": "What was the specific amount of depreciation and amortization expense attributed to property and equipment for the fiscal year 2019?"
  },
  {
    "input": "\"What was the precise monetary worth or estimated value, expressed in the currency of the specific country or region, of the category of Machinery and Equipment during the calendar year of 2019?\"",
    "output": "What was the value of Machinery and equipment in 2019?"
  },
  {
    "input": "What was the exact monetary value of Machinery and equipment in the year 2019?",
    "output": "What was the value of Machinery and equipment in 2019?"
  },
  {
    "input": "In what year did the construction in process have a value of less than 30,000 thousands?",
    "output": "In which year was value of Construction in process less than 30,000 thousands?"
  },
  {
    "input": "In which specific year did the value of Construction in process fall below 30,000 thousands?",
    "output": "In which year was value of Construction in process less than 30,000 thousands?"
  },
  {
    "input": "What were the average values of Machinery and equipment in both 2018 and 2019?",
    "output": "What was the average value of Machinery and equipment for 2018 and 2019?"
  },
  {
    "input": "What is the average telecommunication expenditure during the fiscal years 2018 and 2019?",
    "output": "What is the average Telecom for fiscal year 2018 and 2019?"
  },
  {
    "input": "During which year did the number of Telecom users fall below 200,000 thousand?",
    "output": "In which year was Telecom less than 200,000 thousand?"
  },
  {
    "input": "What was the exact percentage decrease of the revenue in 2019 compared to the previous year?",
    "output": "What was the decrease in the revenue in 2019?"
  },
  {
    "input": "What was the specific percentage decrease in revenue for the fiscal year of 2019?",
    "output": "What was the decrease in the revenue in 2019?"
  },
  {
    "input": "What is the average value of the Telecom industry for the fiscal years 2017 and 2018 combined?",
    "output": "What is the average Telecom value for fiscal year 2017 and 2018?"
  },
  {
    "input": "What was the average value of the telecommunications industry for the fiscal years 2017 and 2018?",
    "output": "What is the average Telecom value for fiscal year 2017 and 2018?"
  },
  {
    "input": "In which year was Telecom value less than 300,000 thousand?",
    "output": "In what specific year did the value of Telecom drop below 300,000 thousand?"
  },
  {
    "input": "What were the values of divestments and acquisitions in 2019, 2018, and 2017?",
    "output": "What was the (Divested)/acquired values in 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "In what specific year did the beginning balance fall below 5,000,000?",
    "output": "In which year was Balance — beginning of year less than 5,000 thousands?"
  },
  {
    "input": "In what specific year did the beginning balance equate to less than 5,000 thousands (5 million)?",
    "output": "In which year was Balance — beginning of year less than 5,000 thousands?"
  },
  {
    "input": "What was the difference in the amount of expenses allocated to provisions between the years 2017 and 2018?",
    "output": "What was the change in the Provisions/(expense) from 2017 to 2018?"
  },
  {
    "input": "What was the difference in the Provisions/(expense) between 2017 and 2018?",
    "output": "What was the change in the Provisions/(expense) from 2017 to 2018?"
  },
  {
    "input": "What was the amount of charge-offs recorded in the year 2019?",
    "output": "What was the charge-offs in 2019?"
  },
  {
    "input": "\"What was the total amount of charge-offs incurred in 2019?\"",
    "output": "What was the charge-offs in 2019?"
  },
  {
    "input": "\"What is the average net Provision for the years 2017, 2018, and 2019?\"",
    "output": "What was the average Provision, net for 2017-2019?"
  },
  {
    "input": "What was the average net provision for the years 2017, 2018, and 2019 combined?",
    "output": "What was the average Provision, net for 2017-2019?"
  },
  {
    "input": "What factors determine the value that is obtained upon vesting?",
    "output": "What is the value realized on vesting based on?"
  },
  {
    "input": "What were the specific equity awards held by each named executive officer throughout the year 2019?",
    "output": "What were the equity awards held by named executives during 2019?"
  },
  {
    "input": "What were the equity awards granted to the named executives in 2019?",
    "output": "What were the equity awards held by named executives during 2019?"
  },
  {
    "input": "What is the significance of the number of shares obtained upon vesting?",
    "output": "What does the number of shares acquired on vesting represent?"
  },
  {
    "input": "Where was the reported combined net loss of $82 million disclosed or published?",
    "output": "Where was the combined net loss of $82 million reported in?"
  },
  {
    "input": "What was the percentage difference in the Adjusted EBITDA margin from 2017 to 2019?",
    "output": "What is the total change in the Adjusted EBITDA margin between 2019 and 2017?"
  },
  {
    "input": "What specific expenses are included in the integration and transformation costs that have an impact on the Adjusted EBITDA?",
    "output": "What does the integration and transformation costs impacting Adjusted EBITDA include?"
  },
  {
    "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?",
    "output": "What was the difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?"
  },
  {
    "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?",
    "output": "How did the total special items affecting adjusted EBITDA change from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?",
    "output": "What is the percentage difference in the amount of special items affecting adjusted EBITDA from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?",
    "output": "What is the percentage difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?"
  },
  {
    "input": "How is the amortization process for deferred acquisition and fulfillment costs conducted?",
    "output": "How are the deferred acquisition and fulfillment costs amortized?"
  },
  {
    "input": "What is the percentage change for the end of period balance for Fulfillment Costs when comparing the beginning of period balance?",
    "output": "What is the percentage change in the end of period balance for Fulfillment Costs when comparing it with the beginning of period balance?"
  },
  {
    "input": "What was the total value of options that were available to be exercised and outstanding as of December 31, 2019?",
    "output": "What was the aggregate intrinsic value of options outstanding and exercisable at December 31, 2019? "
  },
  {
    "input": "Which year has a higher weighted-average exercise price for options outstanding and exercisable?",
    "output": "Which year exhibits a greater weighted-average exercise price for options that are currently outstanding and available for exercise?"
  },
  {
    "input": "What does the deferred income tax benefit (expense) under post-retirement benefit plans currently recognize? ",
    "output": "What recognition does the deferred income tax benefit (expense) currently provide for under post-retirement benefit plans?"
  },
  {
    "input": "What is the percentage increase or decrease in the deferred income tax benefit or expense for post-retirement benefit plans between 2017 and 2018?",
    "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?"
  },
  {
    "input": "What was the percentage difference in the deferred income tax benefit (expense) for post-retirement benefit plans between 2018 and 2017?",
    "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?"
  },
  {
    "input": "How is the Adjusted EBITDA Run Rate determined?",
    "output": "How is the Adjusted EBITDA Run Rate calculated and what factors are considered in determining it?"
  },
  {
    "input": "What are the different performance levels?",
    "output": "What are the various levels of performance and can you provide more details about each level?"
  },
  {
    "input": "How many levels of performance would result in a payout exceeding 50% of the target award?",
    "output": "How many performance levels would the payout as % of target award be above 50%?"
  },
  {
    "input": "How many performance levels have a payout as a percentage of the target award that exceeds 50%?",
    "output": "How many performance levels would the payout as % of target award be above 50%?"
  },
  {
    "input": "What is the main factor or aspect that goodwill is specifically connected or associated with?",
    "output": "What is goodwill attributable to?"
  },
  {
    "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?",
    "output": "What is the ratio of property, plant, and equipment to the total estimated aggregate consideration shown in the balance as of December 31, 2017?"
  },
  {
    "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?",
    "output": "What is the ratio of property, plant and equipment to the total estimated aggregate consideration on the balance sheet as of December 31, 2017?"
  },
  {
    "input": "What is the percentage change in other non current assets in 2018?",
    "output": "What was the percentage change in other non-current assets during the year 2018?"
  },
  {
    "input": "Under what conditions would officers be entitled to the years of welfare benefits?",
    "output": "When would officers have the right to receive welfare benefits for a prolonged period of time?"
  },
  {
    "input": "Which types of officers are listed in the table?",
    "output": "\"What types of officers are included in the table displayed?\""
  },
  {
    "input": "Which type of officer has the longest protected period?",
    "output": "Which rank of officer within a specific profession or organization is entitled to the longest period of protection?"
  },
  {
    "input": "Which type of officer has the longest protected period?",
    "output": "Which type of officer is granted the longest period of protection against any form of termination or dismissal?"
  },
  {
    "input": "What was the gross debt amount in the year 2019?",
    "output": "What is the gross debt in 2019?"
  },
  {
    "input": "What was the difference in the total amount of debt in 2019 compared to 2018?",
    "output": "What is the change in gross debt in 2019 from 2018?"
  },
  {
    "input": "What is the percentage change in gross debt in 2019 from 2018?",
    "output": "What was the percentage increase/decrease in the gross debt from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in gross debt in 2019 from 2018?",
    "output": "What is the percentage difference in the amount of gross debt from 2018 to 2019?"
  },
  {
    "input": "On what basis are the estimated benefit payments made, considering the underlying assumptions?",
    "output": "What assumptions are the estimated benefit payments based on?"
  },
  {
    "input": "How many separate periods are highlighted in the table?",
    "output": "How many different period segments are highlighted in the table?"
  },
  {
    "input": "How many distinct period segments are specifically emphasized in the provided table?",
    "output": "How many different period segments are highlighted in the table?"
  },
  {
    "input": "What is the percentage change in combined pension plan in 2021 from 2020?",
    "output": "What is the percentage difference in the combined pension plan between 2021 and 2020?"
  },
  {
    "input": "What was the extent or percentage of the business that falls under the category of Operations and Other that was sold?",
    "output": "What portion of business under Operations and Other was sold?"
  },
  {
    "input": "What is the total segment revenue in 2019?",
    "output": "What is the exact amount of revenue generated by each segment in 2019 and what is the sum of these revenues overall?"
  },
  {
    "input": "What components are under operating revenue?",
    "output": "What specific components contribute to the total operating revenue of a company or organization?"
  },
  {
    "input": "What additional information does Note 17 provide?",
    "output": "What is the additional information provided in Note 17?"
  },
  {
    "input": "What additional information does Note 17 provide?",
    "output": "What is the content of Note 17 and how does it provide additional information?"
  },
  {
    "input": "\"What was the exact sum of adjusted EBITDA for the entire year of 2019?\"",
    "output": "What is the total adjusted EBITDA in 2019?"
  },
  {
    "input": "What is the change in the adjusted EBITDA under Wholesale in 2019 from 2018?",
    "output": "What was the difference in adjusted EBITDA for the Wholesale category between 2019 and 2018?"
  },
  {
    "input": "What is the average total adjusted EBITDA over the three years?",
    "output": "What is the mean value of the total adjusted EBITDA for the three-year period?"
  },
  {
    "input": "How many executives among the present ones in the organization receive a short-term incentive bonus that exceeds $500,000 in amount?",
    "output": "How many current executives have a STI bonus amount greater than $500,000?"
  },
  {
    "input": "How many executives presently receive a Short-Term Incentive (STI) bonus exceeding $500,000?",
    "output": "How many current executives have a STI bonus amount greater than $500,000?"
  },
  {
    "input": "What is the salary earned by Indraneel Dev in 2019, and how does it compare to his/her STI bonus amount, expressed as a ratio?",
    "output": "What is Indraneel Dev's salary earned during 2019 expressed as a ratio of his/her STI bonus amount?"
  },
  {
    "input": "What is included in the category of 'Support assets'? Please provide a detailed explanation of the various components that constitute these assets.",
    "output": "What do 'Support assets' consist of?"
  },
  {
    "input": "What is the specific figure for Reported Balances' operating revenue?",
    "output": "What is the Reported Balances operating revenue?"
  },
  {
    "input": "What was the variation in the deferred income tax benefit (expense) related to pension plans between 2019 and 2018?",
    "output": "What is the change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?"
  },
  {
    "input": "What is the percentage change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?",
    "output": "What is the exact percentage difference in the deferred income tax benefit or expense for pension plans between the years 2018 and 2019?"
  },
  {
    "input": "What was the outcome of the continuous efforts made to downsize the company's workforce?",
    "output": "What was recognized as a result of ongoing efforts to reduce the company's workforce?"
  },
  {
    "input": "What is the average amount of service costs across 2017, 2018 and 2019?",
    "output": "What is the mean service cost from 2017 to 2019?"
  },
  {
    "input": "What was the total amount of money owed by customers that had not been received as of December 31, 2019?",
    "output": "What was the gross customer receivables at December 31, 2019?"
  },
  {
    "input": "What was the total amount of customer receivables as recorded on the balance sheet at the end of December 31, 2019, including all outstanding invoices and amounts owed to the company by its customers?",
    "output": "What was the gross customer receivables at December 31, 2019?"
  },
  {
    "input": "What is included in the total value of long-term debt?",
    "output": "What does total long-term debt include?"
  },
  {
    "input": "What is included in the total amount of long-term debt?",
    "output": "What does total long-term debt include?"
  },
  {
    "input": "What was the approach or method adopted in 2019 to determine the total assets of an entity?",
    "output": "Regarding the total assets, what was adopted in 2019?"
  },
  {
    "input": "How many different dates declared had a dividend per share of $0.250 in 2018 and 2019?",
    "output": "In 2018 and 2019, how many dates were declared with a dividend per share of $0.250?"
  },
  {
    "input": "\"What specific categories of expenses were documented in the records for the year 2018?\"",
    "output": "What types of expenses were recorded during 2018?"
  },
  {
    "input": "What were the recorded expenses in 2018 and what categories do they fall under?",
    "output": "What types of expenses were recorded during 2018?"
  },
  {
    "input": "\"What is the total amount of non-cash goodwill impairment charges that are not tax-deductible, for the years 2019 and 2018?\"",
    "output": "What is the sum of non-cash, non-tax-deductible goodwill impairment charges for 2019 and 2018?"
  },
  {
    "input": "How much were the cumulative acquisition-related expenses incurred specifically by Level 3 in the year 2019?",
    "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?"
  },
  {
    "input": "What was the total amount of expenses related to acquisitions that Level 3 incurred in the year 2019? Please provide the specific cumulative value.",
    "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?"
  },
  {
    "input": "In which year did the company record the smallest total expenditure associated with acquisitions?",
    "output": "Which year incurred the lowest amount of total acquisition-related expenses?"
  },
  {
    "input": "In which specific year did the company experience the least amount of expenses related to acquisition?",
    "output": "Which year incurred the lowest amount of total acquisition-related expenses?"
  },
  {
    "input": "Against what were the estimates offset for the years 2019, 2018, and 2017?",
    "output": "What are the 2019, 2018 and 2017 change in estimates offset against?"
  },
  {
    "input": "What factors were the changes in estimates for 2019, 2018, and 2017 set off against?",
    "output": "What are the 2019, 2018 and 2017 change in estimates offset against?"
  },
  {
    "input": "In what year was the highest accretion expense recorded?",
    "output": "Which year has the largest accretion expense?"
  },
  {
    "input": "What is the year with the highest accretion expense?",
    "output": "Which year has the largest accretion expense?"
  },
  {
    "input": "What is the average accretion expense across 2017, 2018 and 2019?",
    "output": "What is the average expense for accretion incurred during the years 2017, 2018, and 2019?"
  },
  {
    "input": "Where is the recording location for the current portion of the post-retirement benefit obligations?",
    "output": "Where is the current portion of the post-retirement benefit obligations recorded?"
  },
  {
    "input": "In which specific year is the fair value of plan assets under post-retirement benefit plans higher than in other years?",
    "output": "In which year is the fair value of plan assets under post-retirement benefit plans higher?"
  },
  {
    "input": "\"What were the depreciation and amortization figures for the fiscal year 2019?\"",
    "output": "What is the Depreciation and amortization for 2019?"
  },
  {
    "input": "What is the total adjusted EBITDA for each segment in 2019?",
    "output": "What is the total segment adjusted EBITDA in 2019?"
  },
  {
    "input": "What is the total amount of income tax expense (benefit) recorded in the financial statements for the years 2018 and 2019?",
    "output": "What is the sum of income tax expense (benefit) in 2018 and 2019?"
  },
  {
    "input": "What is the percentage change in income tax expense (benefit) in 2019 from 2018?",
    "output": "What is the percentage difference in income tax expense (benefit) between 2018 and 2019?"
  },
  {
    "input": "What was the total amount of interest expenses incurred in the year 2019?",
    "output": "What is the total interest expense in 2019?"
  },
  {
    "input": "What was the total amount of interest paid in expenses during the year 2019?",
    "output": "What is the total interest expense in 2019?"
  },
  {
    "input": "What segments of interest expense are presented in the table?",
    "output": "Which specific segments of interest expense are displayed in the table provided?"
  },
  {
    "input": "What is the specific monetary difference in the total interest expense for the year 2019 compared to 2018?",
    "output": "What is the change in the gross interest expense in 2019 from 2018?"
  },
  {
    "input": "What was the difference in the amount of money spent on interest expenses in 2019 compared to 2018?",
    "output": "What is the change in the gross interest expense in 2019 from 2018?"
  },
  {
    "input": "What is the average amount of interest paid over the years 2017, 2018, and 2019?",
    "output": "What is the average total interest expense for 2017 to 2019?"
  },
  {
    "input": "\"What is the average annual value of unvested restricted stock awards that were excluded from antidilutive considerations in the years 2017, 2018, and 2019?\"",
    "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?"
  },
  {
    "input": "What were the respective average annual amounts of unvested restricted stock awards that were excluded due to being antidilutive in 2017, 2018, and 2019?",
    "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?"
  },
  {
    "input": "Which year has a larger amount of contract acquisition costs?",
    "output": "In which specific year do the contract acquisition costs exhibit a greater magnitude or quantity compared to other years?"
  },
  {
    "input": "What is the change in contract assets in 2019?",
    "output": "What is the net increase or decrease in contract assets during the year 2019?"
  },
  {
    "input": "What percentage of Level 3 goodwill is specifically allocated to the consumer segment?",
    "output": "How much of Level 3 goodwill is allocated to consumer?"
  },
  {
    "input": "What is the ratio of goodwill to the total amount as of December 31, 2018, in the context of Business?",
    "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?"
  },
  {
    "input": "As of December 31, 2018, what is the ratio of goodwill to the total amount in Business?",
    "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?"
  },
  {
    "input": "What is the combined goodwill figure for the years 2017 and 2018?",
    "output": "What is the sum of the total amount of goodwill for 2017 and 2018?"
  },
  {
    "input": "What is the combined value of goodwill for both 2017 and 2018?",
    "output": "What is the sum of the total amount of goodwill for 2017 and 2018?"
  },
  {
    "input": "\"What specific items are encompassed within the current liabilities section of the consolidated balance sheets?\"",
    "output": "What do the current liabilities reflected in the consolidated balance sheets include?"
  },
  {
    "input": "What specific items are included under current liabilities in the consolidated balance sheets?",
    "output": "What do the current liabilities reflected in the consolidated balance sheets include?"
  },
  {
    "input": "What items are typically categorized as Other current liabilities in financial statements?",
    "output": "What are the items included under Other current liabilities?"
  },
  {
    "input": "In which specific year do capital expenditures account for a greater value of accounts payable?",
    "output": "Which year has a larger amount of accounts payable associated with capital expenditures?"
  },
  {
    "input": "Which specific year indicates a higher level of accounts payable specifically linked to capital expenditures?",
    "output": "Which year has a larger amount of accounts payable associated with capital expenditures?"
  },
  {
    "input": "What distinguishes the total grant values of Indraneel Dev and Stacey W. Goff?",
    "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?"
  },
  {
    "input": "What is the disparity in total grant amounts between Indraneel Dev and Stacey W. Goff?",
    "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?"
  },
  {
    "input": "What is the monthly average for the number of shares that are typically withheld for tax purposes?",
    "output": "What is the average number of shares withheld for taxes per month?"
  },
  {
    "input": "\"What is the monthly average amount of shares that are typically withheld for tax purposes?\"",
    "output": "What is the average number of shares withheld for taxes per month?"
  },
  {
    "input": "What is the total amount of minimum lease payments expected for non-cancellable operating leases in 2019?",
    "output": "How much is the 2019 total future minimum lease payments under non-cancellable operating leases?"
  },
  {
    "input": "What was the percentage change in future minimum lease payments due within one year between 2018 and 2019?",
    "output": "How much did future minimum lease payments due within one year change by between 2018 and 2019?"
  },
  {
    "input": "Which year, 2018 or 2019, had a higher value of capital?",
    "output": "Between 2018 and 2019, which year had a greater amount of capital?"
  },
  {
    "input": "In terms of owned assets, which year, 2018 or 2019, had a higher overall value or quantity of assets?",
    "output": "Between 2018 and 2019, which year had a greater amount of owned assets?"
  },
  {
    "input": "Which year, 2018 or 2019, had a higher total value of assets owned?",
    "output": "Between 2018 and 2019, which year had a greater amount of owned assets?"
  },
  {
    "input": "What were the average values of leased assets for the years 2018 and 2019?",
    "output": "What is the average leased assets for 2018 and 2019?"
  },
  {
    "input": "What was the average value of leased assets for the years 2018 and 2019?",
    "output": "What is the average leased assets for 2018 and 2019?"
  },
  {
    "input": "What is the average trade receivables included within non-current assets?",
    "output": "What is the average amount of trade receivables included within the category of non-current assets?"
  },
  {
    "input": "What is the average prepayments included in non-current assets?",
    "output": "What is the average amount of prepayments that are included in non-current assets?"
  },
  {
    "input": "What financial items does guidance basis comprise of?",
    "output": "What specific components does the guidance basis in financial terms consist of?"
  },
  {
    "input": "Which year, either 2018 or 2019, had a higher adjusted EBITDA on a guidance basis?",
    "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?"
  },
  {
    "input": "Which year, 2018 or 2019, had a higher adjusted EBITDA on a guidance basis? Please provide the comparison between the two specified years.",
    "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?"
  },
  {
    "input": "What is the 2019 average adjusted EBITDA, guidance basis?",
    "output": "\"What was the average adjusted EBITDA on a guidance basis for the year 2019?\""
  },
  {
    "input": "What is the average amount of cash and cash equivalents that are typically reported in the statement of cash flows?",
    "output": "What is the average cash and cash equivalents presented in the statement of cash flows?"
  },
  {
    "input": "What is the average amount of cash and cash equivalents reported in the statement of cash flows?",
    "output": "What is the average cash and cash equivalents presented in the statement of cash flows?"
  },
  {
    "input": "What are the different types of financial items that are included in the data of a consolidated income statement?",
    "output": "What financial items does the consolidated income statement data comprise of?"
  },
  {
    "input": "\"What is the mean total income for the fiscal years 2018 and 2019?\"",
    "output": "What is the average revenue for 2018 and 2019?"
  },
  {
    "input": "What were the average revenues for the years 2018 and 2019?",
    "output": "What is the average revenue for 2018 and 2019?"
  },
  {
    "input": "What was the average revenue in 2017 and 2018?",
    "output": "What is the average revenue for 2017 and 2018?"
  },
  {
    "input": "\"What was the average revenue in both 2017 and 2018? Please provide the specific revenue figures for each year.\"",
    "output": "What is the average revenue for 2017 and 2018?"
  },
  {
    "input": "\"What is the revenue change reported specifically for Europe?\"",
    "output": "What is the reported change in revenue - europe?"
  },
  {
    "input": "What is the reported change in revenue for Europe? How has the revenue in Europe been affected according to the reports?",
    "output": "What is the reported change in revenue - europe?"
  },
  {
    "input": "\"Which country, Germany or Italy, has experienced a greater overall growth in the organic sector over a specified period of time?\"",
    "output": "Between Germany and Italy, which one has a higher organic change?"
  },
  {
    "input": "What does cash consideration paid comprise of?",
    "output": "What is included in the cash consideration paid?"
  },
  {
    "input": "\"In the table, what is the timeframe for the financial years whose information is displayed?\"",
    "output": "Which financial years' information is shown in the table?"
  },
  {
    "input": "What is the total value of acquisitions made in 2019 throughout the year?",
    "output": "How much is the 2019 acquisitions during the year ?"
  },
  {
    "input": "What was the total value of acquisitions made in 2019 throughout the year?",
    "output": "How much is the 2019 acquisitions during the year ?"
  },
  {
    "input": "Which year, 2018 or 2019, witnessed a higher number of acquisitions throughout the entire year?",
    "output": "Between 2018 and 2019, which year had a greater amount of acquisitions during the year?"
  },
  {
    "input": "What are the total operating lease commitments?",
    "output": "What is the sum of all the commitments related to operating leases?"
  },
  {
    "input": "What are the total operating lease commitments?",
    "output": "What is the sum of all commitments made for operating leases?"
  },
  {
    "input": "What is the total outstanding amount of financial liabilities that need to be paid within the shortest timeframe?",
    "output": "How much financial liabilities are due for payment soonest?"
  },
  {
    "input": "What is the proportion of financial liabilities in relation to the overall sum of contractual obligations and commitments?",
    "output": "What percentage of total contractual obligations and commitments is the financial liabilities?"
  },
  {
    "input": "\"What is the average amount of revenue generated from services according to IAS 18 for the years 2018 and 2019?\"",
    "output": "What is the average service revenue between 2018 and 2019 IAS 18?"
  },
  {
    "input": "What is the difference between average service revenue and average other revenue?",
    "output": "What distinguishes average service revenue from average other revenue?"
  },
  {
    "input": "Which specific financial years are being represented in the provided table?",
    "output": "Which financial years' information is shown in the table?"
  },
  {
    "input": "What type of short-term investments are shown in the table?",
    "output": "\"What specific types of short-term investments are displayed in the provided table?\""
  },
  {
    "input": "What is the percentage change in the value of managed investment funds from 2018 to 2019?",
    "output": "What is the change in managed investment funds between 2018 and 2019?"
  },
  {
    "input": "What is the difference in the value of managed investment funds from 2018 to 2019?",
    "output": "What is the change in managed investment funds between 2018 and 2019?"
  },
  {
    "input": "What type of long-term borrowings are shown in the table?",
    "output": "What are the specific types of long-term borrowings that are presented in the table?"
  },
  {
    "input": "What type of long-term borrowings are shown in the table?",
    "output": "What is the specific category of long-term borrowings that is displayed in the table?"
  },
  {
    "input": "What is the percentage change in the number of shares acquired from February to March 2019?",
    "output": "What is the percentage change between shares purchased in February and March 2019?"
  },
  {
    "input": "Which financial years' information is shown in the table?",
    "output": "Which fiscal years' data is displayed in the given table?"
  },
  {
    "input": "Which financial years' information is shown in the table?",
    "output": "Which specific financial year's information is depicted in the table?"
  },
  {
    "input": "How much is the 2019 audit fees ?",
    "output": "What is the exact cost of the audit fees for the year 2019?"
  },
  {
    "input": "What were the average fees for audits in 2018 and 2019?",
    "output": "What is the average audit fees for 2018 and 2019?"
  },
  {
    "input": "What were the average audit fees in both 2018 and 2019?",
    "output": "What is the average audit fees for 2018 and 2019?"
  },
  {
    "input": "\"What is the average total fees for the years 2018 and 2019 combined?\"",
    "output": "What is the average total fees for 2018 and 2019?"
  },
  {
    "input": "What is the average sum of fees for the years 2018 and 2019 combined?",
    "output": "What is the average total fees for 2018 and 2019?"
  },
  {
    "input": "What is the discrepancy between the average fees charged for audits and the average total fees incurred in 2018 and 2019?",
    "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?"
  },
  {
    "input": "What is the disparity between the mean charges for audits and the mean overall charges, comparing the years 2018 and 2019?",
    "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?"
  },
  {
    "input": "What is the amount of the non-current liabilities' 2018 other payables?",
    "output": "How much is the 2018 other payables included within non-current liabilities?"
  },
  {
    "input": "What is the value of the 2019 other payables included in current liabilities, excluding the amount payable for the share buyback?",
    "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?"
  },
  {
    "input": "What is the value of the 2019 other payables included in the current liabilities, except for the amount payable related to the share buyback?",
    "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?"
  },
  {
    "input": "What is the total amount of money that is owed to subsidiaries within one year as of 2019?",
    "output": "What is the 2019 amounts owed to subsidiaries within one year?"
  },
  {
    "input": "Which year experiences a greater number of bank loans and other loans that are due within one year?",
    "output": "Which year has a higher amount of bank loans and other loans under amounts falling due within one year?"
  },
  {
    "input": "How much is 2019 cash at bank and in hand ?",
    "output": "What is the total amount of cash that was available in the bank and on hand in 2019?"
  },
  {
    "input": "Between 2018 and 2019, which year had a greater amount of money market funds?",
    "output": "Which year, 2018 or 2019, had a higher value of money market funds?"
  },
  {
    "input": "Between 2018 and 2019, which year had a greater amount of money market funds?",
    "output": "Which year had a higher volume of funds invested in money market funds: 2018 or 2019?"
  },
  {
    "input": "Which year, 2018 or 2019, witnessed a higher quantity of government securities being issued?",
    "output": "Between 2018 and 2019, which year had a greater amount of government securities?"
  },
  {
    "input": "Between 2018 and 2019, which year had more ordinary shares allotted?",
    "output": "In the period spanning from 2018 to 2019, which year witnessed a higher allocation of ordinary shares?"
  },
  {
    "input": "What is the current cost of servicing for the year 2019?",
    "output": "How much is the 2019 current service cost?"
  },
  {
    "input": "What specific types of investments that are no longer held are displayed in the table?",
    "output": "What type of non-current investments are shown in the table?"
  },
  {
    "input": "In the table, which types of investments that are not currently being utilized or held are depicted?",
    "output": "What type of non-current investments are shown in the table?"
  },
  {
    "input": "How much is the 2018 debt securities?",
    "output": "What is the value or price of the debt securities issued in 2018?"
  },
  {
    "input": "How much is the 2018 debt securities?",
    "output": "What is the cost of the 2018 debt securities?"
  },
  {
    "input": "Which financial years' information is shown in the table?",
    "output": "Which financial years are displayed in the provided table?"
  },
  {
    "input": "What is the total projected income and expenditure for the year 2018?",
    "output": "How much is the 2018 aggregated expected income expense ?"
  },
  {
    "input": "What is the projected total income expense for 2018 after aggregating all relevant information?",
    "output": "How much is the 2018 aggregated expected income expense ?"
  },
  {
    "input": "What is the 2018 deferred tax on overseas earnings, excluding the 15€m charge relating to the combination of Vodafone India with Idea Cellular?",
    "output": "What is the amount of deferred tax on overseas earnings in 2018, excluding the charge of 15€m related to the merger of Vodafone India and Idea Cellular?"
  },
  {
    "input": "What is the difference in average income tax expense between the years 2017-2018 and 2018-2019?",
    "output": "What is the change between 2017-2018 and 2018-2019 average income tax expense?"
  },
  {
    "input": "How much is the excluded depreciation and amortisation for 2019 adjusted EBITDA?",
    "output": "What is the amount of depreciation and amortization that is excluded from the adjusted EBITDA calculation for the year 2019?"
  },
  {
    "input": "How much is the excluded depreciation and amortisation for 2018 adjusted EBITDA?",
    "output": "What is the total amount of depreciation and amortization that were not included in calculating the adjusted EBITDA for the year 2018?"
  },
  {
    "input": "\"What is the exact amount of the adjusted profit before tax for the year 2018?\"",
    "output": "How much is the adjusted profit before tax in 2018?"
  },
  {
    "input": "Which specific financial year's information is displayed in the provided table?",
    "output": "Which financial years' information is shown in the table?"
  },
  {
    "input": "What is the distinction between the average net book value and the average total costs of shares in Group undertakings as of March 31, 2019? How can analyzing this difference assist in better understanding financial performance?",
    "output": "What is the difference between 2019 average net book value and 2019 average total costs of shares in Group undertakings as at 31 March?"
  },
  {
    "input": "What is the significance of the recorded amount for developed technology?",
    "output": "What does the amount recorded for developed technology represent?"
  },
  {
    "input": "What does the recorded amount for developed technology signify, and what is its purpose or significance?",
    "output": "What does the amount recorded for developed technology represent?"
  },
  {
    "input": "What is the useful life for Developed technology?",
    "output": "What is the average duration of usefulness or lifespan for technology that has been fully developed and is in practical use?"
  },
  {
    "input": "What is the difference in useful life between developed technology and customer relationships?",
    "output": "What distinguishes the lifespan of developed technology from that of customer relationships?"
  },
  {
    "input": "What is the difference in fair value between developed technology and customer relationships?",
    "output": "What distinguishes the fair value of developed technology from that of customer relationships?"
  },
  {
    "input": "What identifiable intangible assets have a useful life exceeding five years?",
    "output": "What are the identifiable intangible assets with a useful life above 5 years?"
  },
  {
    "input": "What intangible assets can be identified and have a useful life of more than 5 years?",
    "output": "What are the identifiable intangible assets with a useful life above 5 years?"
  },
  {
    "input": "What does the amount recorded for developed technology represent?",
    "output": "\"What is the meaning or significance of the recorded amount for developed technology?\""
  },
  {
    "input": "What is the useful life (in years) of developed technology?",
    "output": "What is the average duration, measured in years, of the lifespan of technology that has been developed?"
  },
  {
    "input": "What is the difference in fair value between developed technology and customer relationships?",
    "output": "What distinguishes the fair value of developed technology from that of customer relationships, and how do they differ from each other?"
  },
  {
    "input": "What are the intangible assets that have a fair value of above $20,000 thousands?",
    "output": "What are the intangible assets with a fair value exceeding $20,000 thousands? Please provide a comprehensive list of these assets."
  },
  {
    "input": "What are the intangible assets that have a fair value of above $20,000 thousands?",
    "output": "What are the intangible assets with a fair value exceeding $20 million dollars?"
  },
  {
    "input": "What is the distinction between the total amount invested in debt and equity, when considering amortized cost as the basis for evaluation?",
    "output": "What is the difference in the total debt investments and the total equity investments, based on amortized cost?"
  },
  {
    "input": "What is the percentage composition of U.S. treasury securities in relation to the total debt investments when considering fair value?",
    "output": "Based on fair value, what is the percentage constitution of U.S. treasury securities among the total debt investments?"
  },
  {
    "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ",
    "output": "What was the excluded number of potentially anti-dilutive shares for the fiscal years ending June 30, 2017, 2018, and 2019?"
  },
  {
    "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ",
    "output": "What is the total count of potentially anti-dilutive shares excluded for the fiscal years ending on June 30, 2017, 2018, and 2019, respectively?"
  },
  {
    "input": "What is the variance in the weighted-average ordinary shares outstanding for basic and diluted calculations in the fiscal year that ended on June 30, 2019?",
    "output": "In fiscal year ended June 30, 2019, what is the difference in the weighted-average ordinary shares outstanding between the basic and the diluted?"
  },
  {
    "input": "What is the average total non-current operating assets for fiscal years ended June 30, 2018 and 2019?",
    "output": "\"What is the average total value of non-current operating assets for the fiscal years that ended on June 30, 2018 and June 30, 2019?\""
  },
  {
    "input": "How many geographic regions had non-current operating assets exceeding $10,000 thousand in the fiscal year ending on June 30, 2019?",
    "output": "In fiscal year ended June 30, 2019, how many geographic regions have non-current operating assets of more than $10,000 thousand?"
  },
  {
    "input": "What percentage of the total non-current operating assets owned by the Group in the United States constituted the non-current operating assets in the fiscal year ending June 30, 2018?",
    "output": "In fiscal year ended June 30, 2018, what is the percentage constitution of the non-current operating assets in the United States among the total non-current operating assets owned by the Group?"
  },
  {
    "input": "\"What specific adjustments were made to figures in the year 2018?\"",
    "output": "What was the adjustment done to figures in 2018?"
  },
  {
    "input": "How much did the value of security deposits change from fiscal year 2018 to 2019?",
    "output": "What is the change in value between security deposits between fiscal years 2018 and 2019?"
  },
  {
    "input": "What is the  Balance at the beginning of 2019?",
    "output": "What is the balance amount in the accounts at the start of the year 2019?"
  },
  {
    "input": "What is the average deferred tax expense for fiscal years 2018 and 2019?",
    "output": "What is the average amount of deferred tax expense incurred during fiscal years 2018 and 2019?"
  },
  {
    "input": "What is the percentage change of deferred tax expenses between fiscal year 2018 to 2019?",
    "output": "What is the percentage difference in deferred tax expenses between fiscal year 2018 and fiscal year 2019?"
  },
  {
    "input": "What is the amount recorded for short-term investments as of June 30, 2018, on the consolidated statements of financial position for the Group?",
    "output": "As of June 30, 2018, what is the value of short-term investments on the Group's consolidated statements of financial position?"
  },
  {
    "input": "Based on fair value, what is the difference in value between U.S. treasury securities and agency securities?",
    "output": "What is the quantifiable difference between U.S. treasury securities and agency securities in terms of their fair market value?"
  },
  {
    "input": "What are the specific revenue figures for the United Kingdom for the fiscal years that concluded in 2017, 2018, and 2019?",
    "output": "What are the revenue amounts from the United Kingdom for fiscal years ended 2017, 2018 and 2019 respectively?"
  },
  {
    "input": "What is the difference in the revenues from Asia Pacific between fiscal years ended 2018 and 2019?",
    "output": "How does the revenue for the Asia Pacific region differ between the fiscal years ending in 2018 and 2019?"
  },
  {
    "input": "What is the balance of capped call as of June 30, 2019?",
    "output": "What is the specific capped call balance as of June 30, 2019?"
  },
  {
    "input": "What is the difference in the capped call balance between the fiscal year ending on June 30, 2018, and the fiscal year ending on June 30, 2019?",
    "output": "What is the change in the balance of capped call between fiscal year ended June 30, 2018 and 2019?"
  },
  {
    "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?",
    "output": "What is the difference in the balance of capped call and non-marketable investments as of June 30, 2019, and how do these two types of investments vary in terms of their marketability?"
  },
  {
    "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?",
    "output": "What is the balance difference between capped call and non-marketable investments as of June 30, 2019?"
  },
  {
    "input": "\"What is the chronological order of the share capital amounts from June 30, 2015 to 2019?\"",
    "output": "What are the share capital for the years as of June 30, 2015 to 2019 in chronological order?"
  },
  {
    "input": "List the short-term investments from June 30, 2015 to 2019 in chronological order.",
    "output": "What are the short-term investments for the years as of June 30, 2015 to 2019 in chronological order?"
  },
  {
    "input": "What is the difference in the value of short-term investments between fiscal years 2018 and 2019?",
    "output": "What is the variance in the monetary worth of short-term investments from fiscal year 2018 to fiscal year 2019?"
  },
  {
    "input": "What is the average  Cash and cash equivalents for 2015-2019?",
    "output": "What is the average amount of Cash and cash equivalents from 2015 to 2019?"
  },
  {
    "input": "What is the definition of other fees?",
    "output": "What does the term \"other fees\" mean? Please provide a clear definition of this term, including any relevant details, to facilitate a more accurate response."
  },
  {
    "input": "What is the average amount of taxes paid in fees for the fiscal years 2018 and 2019?",
    "output": "What is the average tax fees for fiscal years 2018 and 2019?"
  },
  {
    "input": "What is the disparity in the aggregate of obligations between operating leases and other types of contractual commitments?",
    "output": "What is the difference in the total commitments between that of operating leases and other contractual commitments?"
  },
  {
    "input": "What percentage of the total commitments for operating leases for the year ending in 2020 is constituted by the commitments specifically for operating leases?",
    "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?"
  },
  {
    "input": "What portion of the total commitments for operating leases for the year ending 2020 represents the percentage constitution?",
    "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?"
  },
  {
    "input": "What was the percentage increase in maintenance revenues from the fiscal year ending in 2018 to the fiscal year ending in 2019?",
    "output": "What was the increase of maintenance revenues from fiscal year ended 2018 to 2019?"
  },
  {
    "input": "What was the exact percentage increase in perpetual license revenues from the fiscal year ended 2018 to the fiscal year ended 2019?",
    "output": "What was the increase of perpetual license revenues from fiscal year ended 2018 to 2019?"
  },
  {
    "input": "What percentage of total revenue for the fiscal year ending in 2018 is made up of perpetual license sales?",
    "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?"
  },
  {
    "input": "What percentage of the total revenue for the fiscal year ended 2018 was generated from perpetual licenses?",
    "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?"
  },
  {
    "input": "\"What are the average maintenance revenues for the fiscal years ending in 2018 and 2019, respectively?\"",
    "output": "What is the average maintenance revenues for fiscal year ended 2018 and 2019?"
  },
  {
    "input": "What is the specific provision for dilapidation expenses allocated for the fiscal year 2019?",
    "output": "What is the dilapidation provision for fiscal year 2019?"
  },
  {
    "input": "What is the formula to calculate free cash flow, and can you further explain the steps involved in deriving this measure?",
    "output": "How is free cash flow calculated?"
  },
  {
    "input": "What was the percentage increase in free cash flow from fiscal year 2018 to 2019?",
    "output": "What was the increase in free cash flow between fiscal year 2018 and 2019?"
  },
  {
    "input": "What is the average net cash flow generated by operating activities for the years 2017, 2018, and 2019?",
    "output": "What is the average net cash provided by operating activities from 2017-2019?"
  },
  {
    "input": "\"What is the average amount of net cash generated from operating activities for the years 2017, 2018, and 2019?\"",
    "output": "What is the average net cash provided by operating activities from 2017-2019?"
  },
  {
    "input": "What is the percentage difference in the amount of available cash generated by the company between the years 2017 and 2018?",
    "output": "What is the percentage change in free cash flow between 2017 and 2018?"
  },
  {
    "input": "What is the change in capital expenditures between 2018 and 2019?",
    "output": "What was the difference in capital expenditures from 2018 to 2019?"
  },
  {
    "input": "What is the change in capital expenditures between 2018 and 2019?",
    "output": "What was the difference in the amount of money spent on capital expenditures from 2018 to 2019?"
  },
  {
    "input": "As of June 30, 2019, what is the total value or monetary amount of outstanding payments owed by customers or business partners for goods or services provided?",
    "output": "As of June 30, 2019, what is the amount of trade receivables?"
  },
  {
    "input": "How does the net cash provided by operating activities compare between the fiscal years ending in 2018 and 2019?",
    "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?"
  },
  {
    "input": "What is the change in net cash provided by operating activities between the fiscal years ending in 2018 and 2019?",
    "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?"
  },
  {
    "input": "What are the specific components included in the \"other current assets\" category in the table?",
    "output": "What are the components under other current assets in the table?"
  },
  {
    "input": "What was the difference in the total value of other current assets between 2019 and 2018?",
    "output": "What was the change in total other current assets in 2019 from 2018?"
  },
  {
    "input": "Which items in the table include the cumulative effect of the 2017 Tax Reform Act in their 2017 amounts?",
    "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?"
  },
  {
    "input": "Which items in the table include the sum related to the impact of the 2017 Tax Reform Act in their amount for 2017?",
    "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?"
  },
  {
    "input": "What was the year when the Diluted Net Income per Common Share reached its highest value?",
    "output": "In which year was the Diluted Net Income per Common Share largest?"
  },
  {
    "input": "In which year did the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reach its highest magnitude?",
    "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?"
  },
  {
    "input": "What was the year when the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reached its highest value?",
    "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?"
  },
  {
    "input": "Where is Income tax benefit included?",
    "output": "Where is the income tax benefit typically included in financial statements or accounting records?"
  },
  {
    "input": "Where is Income tax benefit included?",
    "output": "Where can income tax benefit be found in financial statements or reports?"
  },
  {
    "input": "When is the calculation of income tax benefit typically performed annually?",
    "output": "In which years is income tax benefit calculated?"
  },
  {
    "input": "What does restricted cash include?",
    "output": "What is included in restricted cash and can you provide further details about its components?"
  },
  {
    "input": "In which year was Restricted cash included in \"Other current assets\" the lowest?",
    "output": "In which year did the inclusion of Restricted cash in the category of \"Other current assets\" reach its lowest level?"
  },
  {
    "input": "What was the difference in the amount of Restricted cash included in the category of \"Deferred charges and other assets\" in the year 2017 compared to 2016?",
    "output": "What was the change in Restricted cash included in \"Deferred charges and other assets\" in 2017 from 2016?"
  },
  {
    "input": "What is the largest recorded year for Net periodic benefit cost?",
    "output": "In which year was Net periodic benefit cost the largest?"
  },
  {
    "input": "What was the difference in the amount of money spent on interest in 2019 compared to 2018?",
    "output": "What was the change in interest cost in 2019 from 2018?"
  },
  {
    "input": "What was the difference in the cost of interest between 2019 and 2018?",
    "output": "What was the change in interest cost in 2019 from 2018?"
  },
  {
    "input": "\"In which year did the amount of deferred revenue reach its highest value and why?\"",
    "output": "In which year was deferred revenue larger?"
  },
  {
    "input": "During which year was the accumulated income received in advance, known as deferred revenue, higher?",
    "output": "In which year was deferred revenue larger?"
  },
  {
    "input": "What was the specific dollar amount of the change in deferred revenue between 2019 and 2018?",
    "output": "What was the change in deferred revenue in 2019 from 2018?"
  },
  {
    "input": "What was the percentage change in deferred revenue in 2019 from 2018?",
    "output": "What is the percentage increase or decrease in deferred revenue from 2018 to 2019?"
  },
  {
    "input": "What was the change in Fair value of share awards vested in 2019 from 2018?",
    "output": "What was the variation in the fair value of share awards that were vested in the year 2019 compared to 2018?"
  },
  {
    "input": "What was the change in Fair value of share awards vested in 2019 from 2018?",
    "output": "What was the difference in fair value of share awards that were granted in 2018 but vested in 2019?"
  },
  {
    "input": "During which years was information regarding the credit agreements provided?",
    "output": "In which years is information related to the credit agreements provided?"
  },
  {
    "input": "What is the least year in which the Interest expense reached its minimum amount?",
    "output": "In which year was the amount of Interest expense the smallest?"
  },
  {
    "input": "When was the least amount of interest expense recorded?",
    "output": "In which year was the amount of Interest expense the smallest?"
  },
  {
    "input": "What was the change in Interest expense in 2019 from 2018?",
    "output": "What was the difference in the amount of money spent on interest payments in 2019 compared to 2018?"
  },
  {
    "input": "What was the percentage increase or decrease in the amount spent on interest in 2019 compared to 2018?",
    "output": "What was the percentage change in Interest expense in 2019 from 2018?"
  },
  {
    "input": "What was the value of purchases that were accumulated or outstanding as of the end of 2019?",
    "output": "What was the amount of Accrued purchases in 2019?"
  },
  {
    "input": "What was the specific amount of Accrued legal and professional fees incurred during the year 2018?",
    "output": "What was the amount of Accrued legal and professional fees  in 2018?"
  },
  {
    "input": "\"How much money was recorded for Accrued legal and professional fees during the year 2018?\"",
    "output": "What was the amount of Accrued legal and professional fees  in 2018?"
  },
  {
    "input": "What was the percentage difference in Accrued roadside assistance claim costs between 2019 and 2018?",
    "output": "What was the change in Accrued roadside assistance claim costs in 2019 from 2018?"
  },
  {
    "input": "\"What was the specific amount charged for the service in the year 2019?\"",
    "output": "What was the service cost in 2019?"
  },
  {
    "input": "\"What was the cost of the service provided in the year 2019?\"",
    "output": "What was the service cost in 2019?"
  },
  {
    "input": "What was the total cost of interest expenses in the year 2018?",
    "output": "What was the  Interest cost  in 2018?"
  },
  {
    "input": "What was the total amount spent on interest expenses in 2018?",
    "output": "What was the  Interest cost  in 2018?"
  },
  {
    "input": "\"In which specific year did the cost of services exceed the cost of services in the other years?\"",
    "output": "In which year was service cost larger?"
  },
  {
    "input": "Which year had a greater service cost compared to other years?",
    "output": "In which year was service cost larger?"
  },
  {
    "input": "What was the difference in the amount spent on interest in 2019 compared to 2018?",
    "output": "What was the change in interest cost in 2019 from 2018?"
  },
  {
    "input": "\"What was the gross revenue generated by overseas operations, excluding any income taxes, in the fiscal year 2018?\"",
    "output": "What was the  Foreign  income before income taxes in 2018?"
  },
  {
    "input": "What was the total income from foreign sources in 2018, before any deductions for income taxes were made?",
    "output": "What was the  Foreign  income before income taxes in 2018?"
  },
  {
    "input": "What are the components under income before income taxes?",
    "output": "What are the individual components that make up the category of income before income taxes?"
  },
  {
    "input": "In which specific year, prior to the implementation of income taxes, did individuals and/or organizations experience the highest level of income?",
    "output": "In which year was income before income taxes the largest?"
  },
  {
    "input": "What is the largest recorded income before income taxes, and in which specific year was it achieved?",
    "output": "In which year was income before income taxes the largest?"
  },
  {
    "input": "In which specific year did the quantity of an item categorized as \"Other\" exceed the quantity of another item, considering all provided information?",
    "output": "In which year was the amount of Other larger?"
  },
  {
    "input": "In which year did the quantity of a different item exceed the quantity of the rest?",
    "output": "In which year was the amount of Other larger?"
  },
  {
    "input": "What was the year-over-year difference in accrued severance and associated expenses between 2018 and 2019?",
    "output": "What was the change in Accrued severance and related costs in 2019 from 2018?"
  },
  {
    "input": "What was the total payment made by the Company in the year 2019?",
    "output": "How much has the Company paid in total in 2019?"
  },
  {
    "input": "What was the total amount paid by the Company in 2019?",
    "output": "How much has the Company paid in total in 2019?"
  },
  {
    "input": "When did the amount of non-cash impairment charges exceed previous years?",
    "output": "In which year was Non-cash impairment charges larger?"
  },
  {
    "input": "Which year had a greater amount of non-cash impairment charges compared to other years?",
    "output": "In which year was Non-cash impairment charges larger?"
  },
  {
    "input": "During which years is the fair value of restricted shares or restricted stock units (RSUs) vested determined?",
    "output": "In which years is the Fair value of restricted shares/RSUs vested calculated?"
  },
  {
    "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
    "output": "What was the difference in the number of restricted shares/RSUs granted in 2019 compared to 2018?"
  },
  {
    "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
    "output": "What was the difference in the number of restricted shares/ RSUs granted in 2019 compared to 2018?"
  },
  {
    "input": "What was the percentage change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
    "output": "What was the percentage change in the number of restricted shares/ RSUs granted from 2018 to 2019?"
  },
  {
    "input": "As of July 1, 2019, what was the total value of stapled securities of NSR listed on the Australian Securities Exchange?",
    "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?"
  },
  {
    "input": "As of July 1, 2019, what was the total value of NSR stapled securities quoted on the Australian Securities Exchange?",
    "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?"
  },
  {
    "input": "What would happen if the price of a security dropped by 2.5% and how would this affect the overall market?",
    "output": "What would be the impact if security price decreased by 2.5%?"
  },
  {
    "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?",
    "output": "What is the difference in the net book value of Other intangible assets at the beginning of 2018 compared to the beginning of 2019?"
  },
  {
    "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?",
    "output": "What is the difference in the opening net book value of Other intangible assets between 2018 and 2019?"
  },
  {
    "input": "How many distinct business segments were there in the years 2019 and 2018?",
    "output": "How many business segments were present in 2019 and 2018?"
  },
  {
    "input": "\"What is the average revenue generated from New Zealand in both 2018 and 2019?\"",
    "output": "What is the average revenue from New Zealand for 2018 and 2019?"
  },
  {
    "input": "What is the average revenue generated from New Zealand in both 2018 and 2019?",
    "output": "What is the average revenue from New Zealand for 2018 and 2019?"
  },
  {
    "input": "In which specific year did the revenue from New Zealand amount to less than 10,000 thousands?",
    "output": "In which year was revenue from New Zealand under 10,000 thousands?"
  },
  {
    "input": "In what specific year did the revenue generated from New Zealand fall below 10,000 thousands (in the same currency)?",
    "output": "In which year was revenue from New Zealand under 10,000 thousands?"
  },
  {
    "input": "What was the total NLA in Brisbane?",
    "output": "What was the total number of Newly Listed Apartments (NLA) in Brisbane?"
  },
  {
    "input": "What is the difference in the NLA between Sunshine Cost and Brisbane?",
    "output": "What are the NLA differences between the Sunshine Coast and Brisbane, and how do they vary from each other in terms of specific aspects?"
  },
  {
    "input": "In which year is the Net investment hedge negative?",
    "output": "Which specific year does the Net investment hedge demonstrate a negative value?"
  },
  {
    "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?",
    "output": "What is the difference in the taxation impact on revaluation between 2018 and 2019?"
  },
  {
    "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?",
    "output": "What is the difference in the effect of taxation on revaluation between 2018 and 2019?"
  },
  {
    "input": "What was the share of profit from associates representing NSRs in 2019 and 2018?",
    "output": "What were the percentages of profit from associates, specifically those representing NSRs, for the years 2019 and 2018?"
  },
  {
    "input": "What is the difference in the percentage of profit attributed to associates between 2018 and 2019?",
    "output": "What is the change in the Share of profit from associates from 2018 to 2019?"
  },
  {
    "input": "What is the difference in the percentage of profit allocated to associates between 2018 and 2019?",
    "output": "What is the change in the Share of profit from associates from 2018 to 2019?"
  },
  {
    "input": "When was the year when the closing balance on June 30th was below 11,000 thousand units?",
    "output": "In which year was the Closing balance at 30 June less than 11,000 thousands?"
  },
  {
    "input": "What is the year-over-year change in face value for New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?",
    "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in face value for current New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?",
    "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "What was the assets held for sale under Level 1, Level 2 and Level 3 in 2018?",
    "output": "In 2018, what were the assets classified as held for sale and valued at Level 1, Level 2, and Level 3 according to their respective classifications?"
  },
  {
    "input": "What was the amount of senior secured term loan for fiscal years 2019 and 2018?",
    "output": "What is the value of senior secured term loan for fiscal years 2019 and 2018 respectively?"
  },
  {
    "input": "What is the percentage shift in the amount of finance leases and other financing obligations from fiscal year 2018 to fiscal year 2019?",
    "output": "What is the percentage change in the finance leases and other financing obligations between fiscal years 2018 and 2019?"
  },
  {
    "input": "What is the average finance leases and other financing obligations for fiscal years 2018 and 2019?",
    "output": "What is the mean value of finance leases and other financing obligations for fiscal years 2018 and 2019?"
  },
  {
    "input": "What are the income tax expenses for the years 2019, 2018, and 2017?",
    "output": "What is the total current income tax expense for 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "What is the amount of deferred income tax expense (benefit) recorded for the years 2019, 2018, and 2017, respectively?",
    "output": "What is the Total deferred income tax expense (benefit) for 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "What is the average total income tax expense for the years 2017, 2018, and 2019?",
    "output": "What is the average Total income tax expense for 2019, 2018 and 2017?"
  },
  {
    "input": "What is the average aggregate size of the facilities in Arizona and Washington?",
    "output": "What is the average total size of the facilities located in both Arizona and Washington?"
  },
  {
    "input": "\"How many states in the United States have a total of more than 5 facilities?\"",
    "output": "How many states have more than 5 facilities?"
  },
  {
    "input": "What percentage of the total contractual obligations does the total inventory purchase commitments represent?",
    "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?"
  },
  {
    "input": "What percentage of the total contractual obligations is constituted by the inventory purchase commitments in the overall inventory?",
    "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?"
  },
  {
    "input": "\"What specific component within a contractual obligation holds the highest overall value?\"",
    "output": "Which component of contractual obligation has the highest total value?"
  },
  {
    "input": "At December 29, 2017, how many shares were still unvested?",
    "output": "What is the number of shares that are Unvested at December 29, 2017?"
  },
  {
    "input": "What is the length of time that buildings are typically considered useful for?",
    "output": "What is the useful lives of buildings?"
  },
  {
    "input": "What are the estimated useful lives or lifespans of machinery and equipment commonly used in various industries or sectors?",
    "output": "What is the useful lives of Machinery and equipment?"
  },
  {
    "input": "What are the estimated useful lifespans of machinery and equipment? How long can machinery and equipment be expected to remain functional and productive?",
    "output": "What is the useful lives of Machinery and equipment?"
  },
  {
    "input": "What is the average monetary worth of buildings in the years 2018 and 2019?",
    "output": "What is the average value of buildings for 2018 and 2019?"
  },
  {
    "input": "What is the average monetary worth of buildings in the years 2018 and 2019 combined?",
    "output": "What is the average value of buildings for 2018 and 2019?"
  },
  {
    "input": "What is the average adjusted net income from 2017-2019?",
    "output": "\"What is the average adjusted net income for the years 2017, 2018, and 2019?\""
  },
  {
    "input": "What were the values of receivables and inventory in the years 2019 and 2018, respectively?",
    "output": "What is the value of Receivables and inventory for 2019 and 2018 respectively?"
  },
  {
    "input": "What is the value of Accrued expenses for 2019 and 2018 respectively?",
    "output": "What is the amount of Accrued expenses in 2019 and 2018?"
  },
  {
    "input": "What is the value of Self-insurance reserves for 2019 and 2018 respectively?",
    "output": "What are the self-insurance reserve values for the years 2019 and 2018?"
  },
  {
    "input": "What are the average values of receivables and inventory for the years 2019 and 2018?",
    "output": "What is the average value of Receivables and inventory for 2019 and 2018?"
  },
  {
    "input": "What are the average values of Receivables and inventory for the years 2019 and 2018?",
    "output": "What is the average value of Receivables and inventory for 2019 and 2018?"
  },
  {
    "input": "In which year did the Accrued expenses reach the highest level?",
    "output": "Which year has the highest Accrued expenses?"
  },
  {
    "input": "In which specific year did the Accrued expenses reach their highest value?",
    "output": "Which year has the highest Accrued expenses?"
  },
  {
    "input": "Which year recorded the highest Senior Secured Term Loan amount?",
    "output": "Which year has the highest Senior secured term loan?"
  },
  {
    "input": "What information does the table present?",
    "output": "What data is being shown in the table and what details can be derived from it?"
  },
  {
    "input": "What is the change in long-term portion of contingent earn-out liabilities for 2018 and 2019?",
    "output": "What is the difference in the long-term portion of contingent earn-out liabilities between 2018 and 2019?"
  },
  {
    "input": "What were the statutory U.S. Federal tax rates for the years 2019, 2018, and 2017?",
    "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "What are the respective statutory federal tax rates for the years 2019, 2018, and 2017 in the United States?",
    "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "How much did the valuation allowance change in 2017?",
    "output": "What is the change in valuation allowance for 2017?"
  },
  {
    "input": "What was the difference in valuation allowance between 2016 and 2017?",
    "output": "What is the change in valuation allowance for 2017?"
  },
  {
    "input": "What has been the average amount of Statutory U.S. Federal tax paid annually for the years 2017, 2018, and 2019?",
    "output": "What is the average Statutory U.S. Federal tax from 2017-2019?"
  },
  {
    "input": "What was the average statutory U.S. federal tax rate for the years 2017-2019?",
    "output": "What is the average Statutory U.S. Federal tax from 2017-2019?"
  },
  {
    "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?",
    "output": "What is the year-end working capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017, respectively?"
  },
  {
    "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?",
    "output": "What is the Working Capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017?"
  },
  {
    "input": "What are the average values of Cash and cash equivalents for fiscal years 2019, 2018, and 2017?",
    "output": "What is the average value for the Cash and cash equivalents for fiscal years 2019, 2018 and 2017?"
  },
  {
    "input": "What are the average values of the Availability under asset-based loan facility for the fiscal years 2019, 2018, and 2017?",
    "output": "What is the average value for the Availability under asset-based loan facility for fiscal years 2019, 2018 and 2017?"
  },
  {
    "input": "What were the net income (or net losses) for the years 2019, 2018, and 2017 respectively?",
    "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "What were the respective net losses or incomes for the years 2019, 2018, and 2017?",
    "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "In which year was Adjusted EBITDA less than 20,000 thousands?",
    "output": "\"In which specific year, out of the given years, was the Adjusted EBITDA figure lower than 20,000 thousands?\""
  },
  {
    "input": "In which year was Adjusted EBITDA less than 20,000 thousands?",
    "output": "In which specific year was the Adjusted EBITDA, expressed in thousands, less than 20,000?"
  },
  {
    "input": "What was the percentage increase in the expense incurred for Research and Development in the year 2018 compared to the previous period?",
    "output": "What was the increase in Research and development expense in 2018?"
  },
  {
    "input": "\"What was the percentage increase in Research and Development (R&D) expense during the year 2018 compared to the previous year?\"",
    "output": "What was the increase in Research and development expense in 2018?"
  },
  {
    "input": "What were the average research and development expenses in both 2017 and 2018?",
    "output": "What was the average Research and development expenses in 2017 and 2018?"
  },
  {
    "input": "What were the respective average amounts spent on research and development in 2017 and 2018?",
    "output": "What was the average Research and development expenses in 2017 and 2018?"
  },
  {
    "input": "What was the change in the Net operating loss carryforwards from 2018 to 2019?",
    "output": "What was the numerical difference between the Net operating loss carryforwards in 2018 and 2019?"
  },
  {
    "input": "\"In which specific year did the value of Deferred revenue fall below 2,000?\"",
    "output": "In which year was Deferred revenue less than 2,000?"
  },
  {
    "input": "What is the difference in the amount of accumulated depreciation between March 31, 2018, and March 31, 2019?",
    "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?"
  },
  {
    "input": "How much did the Accumulated depreciation increase between March 31, 2018 and March 31, 2019?",
    "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?"
  },
  {
    "input": "What was the percentage change in the General and administrative expenses in 2019 compared to the previous year?",
    "output": "What was the increase in the General and administrative expenses in 2019?"
  },
  {
    "input": "What was the percentage increase in the General and administrative expenses from 2018 to 2019?",
    "output": "What was the increase in the General and administrative expenses in 2019?"
  },
  {
    "input": "\"Who has the authority over the management and administration of the company's operations in a specific country?\"",
    "output": "Which country controls the company's affairs?"
  },
  {
    "input": "\"Who has jurisdiction over the overall management and operations of the company?\"",
    "output": "Which country controls the company's affairs?"
  },
  {
    "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?",
    "output": "What was the difference in the amount of state taxes paid in the United States, after deducting federal taxes, from 2018 to 2019?"
  },
  {
    "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?",
    "output": "What was the difference in the amount of state taxes, after deducting federal taxes, between the years 2018 and 2019 in the United States?"
  },
  {
    "input": "In which year was Tax credits less than 10.0?",
    "output": "When was the year when tax credits were less than 10.0 units?"
  },
  {
    "input": "In which year was Tax credits less than 10.0?",
    "output": "In which specific year did the amount of Tax credits fall below 10.0?"
  },
  {
    "input": "What was the exact amount of share-based compensation expense incurred during the year 2019?",
    "output": "What was the Total share-based compensation expense in 2019?"
  },
  {
    "input": "\"What changes, if any, took place once the IPO concluded?\"",
    "output": "What became effective upon the closing of the IPO?"
  },
  {
    "input": "What were the initial balances in the years 2019 and 2018, respectively?",
    "output": "What was the Beginning balance in 2019 and 2018 respectively?"
  },
  {
    "input": "What were the starting balances for the year 2019 and 2018?",
    "output": "What was the Beginning balance in 2019 and 2018 respectively?"
  },
  {
    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?",
    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year ending on March 31, 2018?"
  },
  {
    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?",
    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2018?"
  },
  {
    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2017?",
    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2017?"
  },
  {
    "input": "What are the main sources of liquidity in financial markets and institutions?",
    "output": "What are the principal sources of liquidity?"
  },
  {
    "input": "What was the total value of outstanding letters of credit associated with specific operating leases as of March 31, 2018?",
    "output": "How much was the outstanding letters of credit related to certain operating leases as of March 31, 2018?"
  },
  {
    "input": "What were the revenue retention rates for the years 2019, 2018, and 2017 in chronological order?",
    "output": "What was the Revenue retention rate in 2019, 2018 and 2017 respectively?"
  },
  {
    "input": "What is the average number of customers in total from 2017 to 2019?",
    "output": "What is the average Total customers between 2017-2019?"
  },
  {
    "input": "\"In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?\"",
    "output": "In which year was Adjusted EBITDA less than 20,000 thousands?"
  },
  {
    "input": "In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?",
    "output": "In which year was Adjusted EBITDA less than 20,000 thousands?"
  },
  {
    "input": "What was the amount of Depreciation and amortization expense recorded for the fiscal year that ended on March 31, 2017?",
    "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?"
  },
  {
    "input": "What was the total amount of Depreciation and amortization expense recorded for the fiscal years ending on March 31, 2017?",
    "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?"
  },
  {
    "input": "What is the net increase or decrease in the Accumulated Amortization balance from March 31, 2018, to March 31, 2019?",
    "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?"
  },
  {
    "input": "What is the net increase or decrease in the amount of Accumulated Amortization between March 31, 2018, and March 31, 2019?",
    "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?"
  },
  {
    "input": "What was the percentage change in the Risk-free interest rate from 2018 to 2019?",
    "output": "What was the change in the Risk-free interest rate from 2018 to 2019?"
  },
  {
    "input": "What was the average level of expected volatility recorded between 2017 and 2019?",
    "output": "What was the average Expected volatility between 2017-2019?"
  },
  {
    "input": "What was the average expected volatility during the period of 2017-2019?",
    "output": "What was the average Expected volatility between 2017-2019?"
  },
  {
    "input": "In which specific year did the grant date fair value per ordinary share fall below 30.0?",
    "output": "In which year was the Grant date fair value per ordinary share less than 30.0?"
  },
  {
    "input": "When was the year when the grant date fair value per ordinary share fell below 30.0?",
    "output": "In which year was the Grant date fair value per ordinary share less than 30.0?"
  },
  {
    "input": "\"What was the specific quantity or value of work that was at the intermediate stage of completion in the year 2018?\"",
    "output": "What was the amount of work in process in 2018?"
  },
  {
    "input": "What was the change in finished goods between 2018 and 2019?",
    "output": "What was the numerical difference in the amount of finished goods produced between the years 2018 and 2019?"
  },
  {
    "input": "What was the specific amount of interest incurred in the year 2018?",
    "output": "What was the interest cost in 2018?"
  },
  {
    "input": "What was the change in interest cost between 2018 and 2019?",
    "output": "By how much did the interest cost change from 2018 to 2019?"
  },
  {
    "input": "For how many years did the actuarial loss surpass $1,000 thousand and receive recognition?",
    "output": "How many years did the recognized actuarial loss exceed $1,000 thousand?"
  },
  {
    "input": "What was the total amount of insurance proceeds received during the fiscal year that ended on August 31, 2019?",
    "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?"
  },
  {
    "input": "\"What was the total amount of insurance claims paid out during the fiscal year that concluded on August 31, 2019?\"",
    "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?"
  },
  {
    "input": "What was the specific difference in the amount of Restructuring and related charges incurred by the company in the years 2018 and 2019?",
    "output": "What was the change in Restructuring and related charges between 2018 and 2019?"
  },
  {
    "input": "What were the reductions for tax positions  primarily related to?",
    "output": "\"What was the primary reason for the reductions in tax positions?\""
  },
  {
    "input": "What is the company's definition of working capital and how is it calculated?",
    "output": "How does the company define working capital?"
  },
  {
    "input": "What was the amount of assets in 2019?",
    "output": "What were the total assets in 2019?"
  },
  {
    "input": "What was the value of the total assets in the year 2019?",
    "output": "What were the total assets in 2019?"
  },
  {
    "input": "What is the range of years covered in the table?",
    "output": "What are the years included in the table?"
  },
  {
    "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?",
    "output": "What was the difference in the amount of current installments of notes payable and long-term debt between the year 2018 and 2019?"
  },
  {
    "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?",
    "output": "What is the difference in the amount of current installments of notes payable and long-term debt between the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in the stockholders' equity of Total Jabil Inc. from 2018 to 2019?",
    "output": "What was the percentage change in Total Jabil Inc. stockholders’ equity between 2018 and 2019?"
  },
  {
    "input": "What was the specific net revenue figure reported?",
    "output": "What was the net revenue as reported?"
  },
  {
    "input": "What was the operating income as reported?",
    "output": "What was the reported operating income amount?"
  },
  {
    "input": "What was the specific distinction between the reported net revenue and cost of revenue? Please elaborate on the differences observed between these two financial measures.",
    "output": "What was the difference between net revenue and cost of revenue as reported?"
  },
  {
    "input": "What was the distinction between the net revenue and cost of revenue as stated in the report, and can you provide further details to assist in answering?",
    "output": "What was the difference between net revenue and cost of revenue as reported?"
  },
  {
    "input": "What was the operating income balance expressed as a ratio of the amount reported, before considering the implementation of ASU 2014-09?",
    "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?"
  },
  {
    "input": "What was the ratio of the operating income balance without the adoption of ASU 2014-09 to the amount as reported?",
    "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?"
  },
  {
    "input": "By what amount did the gross profit increase or decrease from May 2018 to August 2018?",
    "output": "What was the change in gross profit between May 2018 and August 2018?"
  },
  {
    "input": "What are the distinctions between the September 2018 balances of contract assets and contract liabilities? Please provide a comprehensive explanation while keeping all the details intact.",
    "output": "What is the difference between the balance in September 2018 for contract assets and contract liabilities?"
  },
  {
    "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?",
    "output": "What was the percentage change specifically for Other accrued expenses as a result of adjustments made under the new standard?"
  },
  {
    "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?",
    "output": "What was the percentage change in Other accrued expenses after incorporating adjustments made by the new standard? Please provide the specific percentage change."
  },
  {
    "input": "What is the exact amount of net revenue generated in the fiscal year 2019?",
    "output": "What was the net revenue in 2019?"
  },
  {
    "input": "What was the total revenue generated during the year 2019 after deducting all expenses and costs?",
    "output": "What was the net revenue in 2019?"
  },
  {
    "input": "What was the gross profit in 2018?",
    "output": "What was the gross profit generated in the year 2018?"
  },
  {
    "input": "What was the operating income in 2017?",
    "output": "What was the operating income for the year 2017?"
  },
  {
    "input": "What was the change in gross profit between 2017 and 2018?",
    "output": "What was the difference in gross profit from 2017 to 2018?"
  },
  {
    "input": "What was the change in Prepaid expenses and other current assets due to the adoption of ASU 2014-09?",
    "output": "\"What was the impact on Prepaid expenses and other current assets resulting from the adoption of ASU 2014-09? Please provide details and explain the specific changes that occurred as a result of this accounting standard.\""
  },
  {
    "input": "How many reported segments have a value exceeding $2,000,000?",
    "output": "How many segments as reported exceed $2,000,000 thousand?"
  },
  {
    "input": "What were the items incorporated within the amounts for the fiscal year that concluded on August 31, 2017?",
    "output": "What did the amounts in fiscal year ended August 31, 2017 include?"
  },
  {
    "input": "What expenses or revenues were included in the amounts reported for the fiscal year that ended on August 31, 2017?",
    "output": "What did the amounts in fiscal year ended August 31, 2017 include?"
  },
  {
    "input": "What was the difference in the costs of severance and employee benefits for the company from 2017 to 2018?",
    "output": "What was the change in Employee severance and benefit costs between 2017 and 2018?"
  },
  {
    "input": "Which periods does the table include?",
    "output": "What time periods are encompassed in the table's content?"
  },
  {
    "input": "\"What specific items or transactions were encompassed within the category of acquisitions and adjustments?\"",
    "output": "What did Acquisitions and adjustments include?"
  },
  {
    "input": "What were the variations in acquisitions and adjustments made in fiscal year 2018 between the Electronic Manufacturing Services (EMS) and Document Management Systems (DMS)?",
    "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?"
  },
  {
    "input": "What are the distinctions in acquisitions and adjustments, specifically pertaining to fiscal year 2018, when comparing EMS (Emergency Medical Services) and DMS (Document Management Systems)?",
    "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?"
  },
  {
    "input": "What was the balance of EMS as in 2018 as a percentage of the total balance at the end of fiscal 2018?",
    "output": "What percentage of the total balance at the end of fiscal 2018 represented the balance of EMS in 2018?"
  },
  {
    "input": "What categories of income or loss are included in the table's income (loss) figures before accounting for income tax expense?",
    "output": "What types of income (loss) before income tax expense is provided by the table?"
  },
  {
    "input": "What was the foreign income (loss) in 2018?",
    "output": "What was the amount of foreign income or loss for the year 2018?"
  },
  {
    "input": "What was the change in Foreign income (loss) between 2018 and 2019?",
    "output": "What was the difference in the amount of income or loss generated from foreign sources, comparing the years 2018 and 2019?"
  },
  {
    "input": "What was the change in Foreign income (loss) between 2018 and 2019?",
    "output": "What was the difference in the amount of foreign income or loss reported between 2018 and 2019?"
  },
  {
    "input": "\"How many annual periods experienced a Foreign income loss that was greater than $800,000 thousand dollars?\"",
    "output": "How many years did Foreign income (loss) exceed $800,000 thousand?"
  },
  {
    "input": "For how many years was the amount of Foreign income (loss) greater than $800,000 thousand?",
    "output": "How many years did Foreign income (loss) exceed $800,000 thousand?"
  },
  {
    "input": "What was the change in Accumulated benefit obligation between 2018 and 2019?",
    "output": "What was the specific numerical difference in the Accumulated Benefit Obligation (ABO) amount between the fiscal years of 2018 and 2019?"
  },
  {
    "input": "\"What were the specific expenses incurred by the company for employee severance and benefits during the year 2018?\"",
    "output": "What were the Employee severance and benefit costs in 2018?"
  },
  {
    "input": "What were the severance and benefit costs for employees in the year 2018?",
    "output": "What were the Employee severance and benefit costs in 2018?"
  },
  {
    "input": "What was the precise difference in terms of employee severance and benefit costs from 2018 to 2019?",
    "output": "What was the change in Employee severance and benefit costs between 2018 and 2019?"
  },
  {
    "input": "How many years did cash proceeds received exceed $5,000 million?",
    "output": "How many years have the cash proceeds received been above $5,000 million?"
  },
  {
    "input": "How many years did cash proceeds received exceed $5,000 million?",
    "output": "For how many years did the cash proceeds received exceed $5,000 million?"
  },
  {
    "input": "What was the percentage increase or decrease in pre-tax losses on the sale of receivables from 2017 to 2018?",
    "output": "What was the percentage change in Pre-tax losses on sale of receivables between 2017 and 2018?"
  },
  {
    "input": "What was the difference in the amount of money spent on repurchasing shares in 2016 compared to 2017?",
    "output": "What was the change in share repurchases between 2016 and 2017?"
  },
  {
    "input": "What was the percentage increase or decrease in dividend payments from 2018 to 2019?",
    "output": "What was the percentage change in dividends paid between 2018 and 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in dividends paid from 2018 to 2019?",
    "output": "What was the percentage change in dividends paid between 2018 and 2019?"
  },
  {
    "input": "What was the specific topic of discussion in the DMS segment?",
    "output": "What was the DMS segment focused on?"
  },
  {
    "input": "What was the change in the net revenue from EMS between 2018 and 2019?",
    "output": "What was the difference in net revenue generated by EMS between the years 2018 and 2019?"
  },
  {
    "input": "What was the change in the net revenue from EMS between 2018 and 2019?",
    "output": "What was the exact difference in net revenue for EMS between the years 2018 and 2019?"
  },
  {
    "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?",
    "output": "What was the average annual percentage increase or decrease in the total net revenue from 2017 to 2019?"
  },
  {
    "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?",
    "output": "What was the average percentage change in net revenue from year to year for the total period between 2017 and 2019?"
  },
  {
    "input": "What is the difference in the balance of Employee Severance and Benefit Costs as of August between 2017 and 2018?",
    "output": "What is the change in the balance as of August in Employee Severance and Benefit Costs between 2017 and 2018?"
  },
  {
    "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?",
    "output": "\"What was the percentage of lease costs in 2017 compared to the total balance in 2017?\""
  },
  {
    "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?",
    "output": "\"What proportion of the total balance in 2017 was allocated towards lease costs?\""
  },
  {
    "input": "\"What was the amount of the domestic-federal income tax expense (benefit) recorded for the year 2017?\"",
    "output": "What was the current domestic-federal income tax expense (benefit) in 2017?"
  },
  {
    "input": "What was the amount of domestic-federal income tax expense (benefit) recorded in the financial statements for the year 2017?",
    "output": "What was the current domestic-federal income tax expense (benefit) in 2017?"
  },
  {
    "input": "What was the amount of income tax expense or benefit incurred for domestic operations in 2019?",
    "output": "What was the current domestic-state income tax expense (benefit) in 2019?"
  },
  {
    "input": "In 2019, what was the amount of income tax expense or benefit incurred at the domestic state level?",
    "output": "What was the current domestic-state income tax expense (benefit) in 2019?"
  },
  {
    "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?",
    "output": "What is the difference in the domestic-state income tax expense (benefit) for the company from 2018 to 2019?"
  },
  {
    "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?",
    "output": "What was the difference in the domestic-state income tax expense (benefit) of the company from 2018 to 2019?"
  },
  {
    "input": "What were the types of land and the associated improvements that existed in the year 2019?",
    "output": "What were the land and improvements in 2019?"
  },
  {
    "input": "What was the total monetary worth of all buildings in the year 2018?",
    "output": "What was the value of buildings in 2018?"
  },
  {
    "input": "What was the percentage change in total property, plant and equipment between 2018 and 2019?",
    "output": "What was the percentage increase or decrease in the total value of property, plant, and equipment from 2018 to 2019?"
  },
  {
    "input": "Based on what factors or criteria were the fair value estimates for Level 3 determined?",
    "output": "What were the Level 3 fair value estimates based on?"
  },
  {
    "input": "What factors or criteria were taken into consideration when determining the Level 3 fair value estimates?",
    "output": "What were the Level 3 fair value estimates based on?"
  },
  {
    "input": "What was the change in the fair value for the 3.950% Senior Notes between 2018 and 2019?",
    "output": "What was the difference in fair value for the 3.950% Senior Notes from 2018 to 2019?"
  },
  {
    "input": "What is the significance of the amount awarded as shares, which is contingent upon meeting specific performance criteria?",
    "output": "What does the amount for shares granted that are based on the achievement of certain performance criteria represent?"
  },
  {
    "input": "What was the percentage difference in the number of shares from 2018 to 2019 and how do I calculate it accurately?",
    "output": "What was the percentage change in the number of shares between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in Expected dividend yield between 2017 and 2018?",
    "output": "What was the percentage adjustment in the expected dividend yield from 2017 to 2018?"
  },
  {
    "input": "What were the restricted stock units in 2019?",
    "output": "What is the significance of restricted stock units in 2019, and can you provide more details about them?"
  },
  {
    "input": "What were the details and features of the Employee stock purchase plan (ESPP) implemented in 2018?",
    "output": "What was the Employee stock purchase plan in 2018?"
  },
  {
    "input": "What was the change in Other stock-based compensation expenses between 2017 and 2018?",
    "output": "What was the difference in the expenses related to stock-based compensation for Other category between the years 2017 and 2018?"
  },
  {
    "input": "\"What was the difference in the number of restricted stock units awarded by a company in 2018 compared to 2019?\"",
    "output": "What was the change in the restricted stock units between 2018 and 2019?"
  },
  {
    "input": "What was the net change in the number of restricted stock units from 2018 to 2019?",
    "output": "What was the change in the restricted stock units between 2018 and 2019?"
  },
  {
    "input": "What does the key management comprise of?",
    "output": "What is included in key management?"
  },
  {
    "input": "What is the combined value of all shares held in joint ventures as of December 31, 2019?",
    "output": "What is the total value of shares at 31 December 2019 for each joint venture?"
  },
  {
    "input": "What is the precise percentage change in revenue losses experienced by the United Kingdom from the year 2018 to 2019?",
    "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?"
  },
  {
    "input": "What is the percentage increase or decrease in the revenue losses experienced in the United Kingdom from 2018 to 2019?",
    "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?"
  },
  {
    "input": "What was the net amount of debt in 2019?",
    "output": "What is the Net debt in 2019?"
  },
  {
    "input": "What was the net debt amount in the year 2019?",
    "output": "What is the Net debt in 2019?"
  },
  {
    "input": "What is the percentage change in the net debt from 2018 to 2019?",
    "output": "What is the percentage increase or decrease in the net debt from 2018 to 2019?"
  },
  {
    "input": "What were the fees payable by the Group's joint ventures in 2019?",
    "output": "What were the specific fees that the Group's joint ventures had to pay in the year 2019?"
  },
  {
    "input": "What were the fees payable by the Group's joint ventures in 2019?",
    "output": "What were the fees that the Group's joint ventures had to pay in 2019? Could you provide details on the specific amounts payable by each joint venture?"
  },
  {
    "input": "What proportion of the total borrowings in 2019, including both the principal and interest, will be due within a period of 5 years?",
    "output": "What is the percentage of borrowings (including interest) that matures over 5 years in the total borrowings in 2019?"
  },
  {
    "input": "What was the net asset value (NAV) of EPRA in 2018?",
    "output": "What is the EPRA NAV in 2018?"
  },
  {
    "input": "What is the precise percentage difference in the overall investment made in joint ventures for the years 2018 and 2019?",
    "output": "What is the percentage change in the total investment in joint ventures from 2018 to 2019?"
  },
  {
    "input": "What was the interest rate protection provided in 2019?",
    "output": "What is the interest rate protection in 2019?"
  },
  {
    "input": "What is the current interest rate protection in 2019 and how does it work?",
    "output": "What is the interest rate protection in 2019?"
  },
  {
    "input": "What is the percentage difference in the fair value of unallocated swaps between 2018 and 2019?",
    "output": "What is the percentage change in the fair value of unallocated swaps from 2018 to 2019?"
  },
  {
    "input": "What was the difference in the level of interest rate protection from 2018 to 2019?",
    "output": "What is the change in interest rate protection between 2018 and 2019?"
  },
  {
    "input": "What is the year-on-year percentage change in the calculation of contingent rents based on tenants' turnover from 2018 to 2019?",
    "output": "What is the percentage change in the amount of contingent rents calculated by reference to tenants' turnover from 2018 to 2019?"
  },
  {
    "input": "In what year will the future minimum lease amounts receivable be higher than usual?",
    "output": "In which year is there  higher future minimum lease amounts receivable?"
  },
  {
    "input": "What were the dividends in respect of these shares have been waived by agreement in 2018?",
    "output": "What was the specific agreement regarding the waiver of dividends on these shares in 2018?"
  },
  {
    "input": "What is the percentage change in the disposals value from 2018 to 2019?",
    "output": "What is the percentage difference in the value of disposals from 2018 to 2019, and how can it be calculated?"
  },
  {
    "input": "What is the current total quantity of units that the Company possesses?",
    "output": "What is the total number of units the Company has?"
  },
  {
    "input": "What is the weighted average exercise price for the outstanding options in the Save As You Earn Scheme as of 31 December 2019?",
    "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?"
  },
  {
    "input": "What is the calculated average exercise prices of the options that are currently outstanding and can be exercised as of December 31, 2019, under the Save As You Earn Scheme?",
    "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?"
  },
  {
    "input": "What is the percentage change in the total provided deferred tax provision from 1 January 2018 to 31 December 2019?",
    "output": "What is the precise percentage change in the total amount of deferred tax provision that was provided from 1 January 2018 to 31 December 2019?"
  },
  {
    "input": "What was the total amount of administration expenses incurred in the year 2019?",
    "output": "What is the administration expense in 2019?"
  },
  {
    "input": "What is encompassed in diluted shares?",
    "output": "What do the diluted shares include?"
  },
  {
    "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?",
    "output": "What is the proportion of the exposure to counterparty #5 compared to the total exposure in the year 2019?"
  },
  {
    "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?",
    "output": "What is the proportion of counterparty #5's exposure compared to the total exposure in the year 2019?"
  },
  {
    "input": "What is the percentage difference in the combined value of cash deposits and derivative financial instrument assets between 2018 and 2019?",
    "output": "What is the percentage change in the sum of cash deposits and derivative financial instrument assets from 2018 to 2019?"
  },
  {
    "input": "What is the percentage difference in the loss before tax, including joint ventures and associates, from 2018 to 2019?",
    "output": "What is the percentage change in the loss before tax, joint ventures and associates from 2018 to 2019?"
  },
  {
    "input": "What is the net exposure to foreign exchange risk (euro) in 2018?",
    "output": "\"What is the total amount of foreign exchange risk (in euros) faced by a company in the year 2018?\""
  },
  {
    "input": "What is the impact on equity attributable to owners of the Group when there is a 10% depreciation in foreign exchange rates from 2018 to 2019? Specifically, how does this affect the negative movement of money to equity?",
    "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?"
  },
  {
    "input": "What is the impact on the Group's owners' equity when foreign exchange rates depreciate by 10% between 2018 and 2019?",
    "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?"
  },
  {
    "input": "What is the percentage difference in the euro amount drawn from 2018 to 2019?",
    "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?"
  },
  {
    "input": "What is the percentage increase or decrease in the euro amount withdrawn between 2018 and 2019?",
    "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?"
  },
  {
    "input": "\"In which specific year did the total debt reach its highest level, and is it greater than the total debt in any other year?\"",
    "output": "In which year is there a greater total debt?"
  },
  {
    "input": "In which year does the total debt amount exceed or surpass the debt levels of other years?",
    "output": "In which year is there a greater total debt?"
  },
  {
    "input": "What is the percentage change in the total debt from 2018 to 2019?",
    "output": "What is the percentage increase or decrease in the total debt from 2018 to 2019?"
  },
  {
    "input": "\"What were the average salaries for administrative positions in fiscal years 2019 and 2018?",
    "output": "What is the average Administrative salaries for fiscal 2019 and 2018?"
  },
  {
    "input": "\"What was the average annual salary for administrative employees in fiscal years 2019 and 2018?",
    "output": "What is the average Administrative salaries for fiscal 2019 and 2018?"
  },
  {
    "input": "What is the weekly processing capacity comparison between the plants located in Laurel, Mississippi and Collins, Mississippi?",
    "output": "What is the difference in capacity per week between the processing plants at Laurel, Mississippi and Collins, Mississippi? "
  },
  {
    "input": "What are the average amounts for prepaid insurance in both fiscal years 2019 and 2018?",
    "output": "What is the average Prepaid insurance for fiscal years 2019 and 2018?"
  },
  {
    "input": "What is the difference in the amount of Total prepaid expenses recorded in the financial statements for fiscal years 2019 and 2018? I need to determine the change in prepaid expenses for these two specific years.",
    "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?"
  },
  {
    "input": "What is the difference in the total amount of prepaid expenses between fiscal years 2019 and 2018?",
    "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?"
  },
  {
    "input": "What is the difference in the statutory income tax rates between fiscal years 2019 and 2018, and how does this impact income taxes?",
    "output": "What is the change in Income taxes at statutory rate between fiscal years 2019 and 2018?"
  },
  {
    "input": "What is the net sales from Fresh, vacuum-sealed chicken for fiscal years 2019 to 2017 respectively?",
    "output": "What were the net sales of Fresh, vacuum-sealed chicken for fiscal years 2019, 2018, and 2017?"
  },
  {
    "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?",
    "output": "What are the net sales figures for Fresh, chill-packed chicken for fiscal years 2019, 2018, and 2017, respectively?"
  },
  {
    "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?",
    "output": "How much revenue was generated from the sales of Fresh, chill-packed chicken for the fiscal years of 2019, 2018, and 2017, respectively?"
  },
  {
    "input": "What is the net sales from Fresh, ice-packed chicken for fiscal years 2019 to 2017 respectively?",
    "output": "What were the net sales of Fresh, ice-packed chicken for fiscal years 2019, 2018, and 2017?"
  },
  {
    "input": "What was the purpose of the company's operating leases, and how did they contribute to the company's operations or financial performance?",
    "output": "What was the company's operating leases for?"
  },
  {
    "input": "What is the discrepancy in the overall amount when comparing revenues gained from revolving loan facility fees to expenses incurred from operating leases?",
    "output": "What was the difference in the total between Revolving loan facility fees and operating leases? "
  },
  {
    "input": "What was the change in the Net cash used in investing activities between 2017 and 2019?",
    "output": "What was the difference in the amount of net cash used in investing activities from 2017 to 2019?"
  },
  {
    "input": "What was the percentage change in the Net cash used in financing activities between 2018 and 2019?",
    "output": "What was the percentage difference in the amount of cash used in financing activities from 2018 to 2019?"
  },
  {
    "input": "Which years does the table provide information for the company's Selected Consolidated Statements of Operations Data?",
    "output": "For which specific time periods does the table present the company's Selected Consolidated Statements of Operations Data?"
  },
  {
    "input": "What was the exact amount of revenue generated in the year 2015?",
    "output": "What was the total revenue in 2015?"
  },
  {
    "input": "What was the specific amount of operating profit generated in the year 2017?",
    "output": "What was the operating profit in 2017?"
  },
  {
    "input": "What was the exact amount of operating profit earned during the year 2017?",
    "output": "What was the operating profit in 2017?"
  },
  {
    "input": "What is the precise percentage difference in operating profit from 2015 to 2016?",
    "output": "What was the percentage change in operating profit between 2015 and 2016?"
  },
  {
    "input": "What was the percentage increase or decrease in operating profit from 2015 to 2016?",
    "output": "What was the percentage change in operating profit between 2015 and 2016?"
  },
  {
    "input": "What was the cash used in Software in 2019?",
    "output": "How much money was spent on Software in the year 2019?"
  },
  {
    "input": "What was the net income in 2019?",
    "output": "What was the exact net income amount for the fiscal year of 2019?"
  },
  {
    "input": "What was the specific amount of interest that was recorded as an expense in the year 2018?",
    "output": "What was the interest expense in 2018?"
  },
  {
    "input": "What is the amount of money paid as interest in 2018?",
    "output": "What was the interest expense in 2018?"
  },
  {
    "input": "For how many consecutive years has the net income exceeded $100,000 thousand?",
    "output": "How many years did net income exceed $100,000 thousand?"
  },
  {
    "input": "What was the change in the interest expense between 2017 and 2018?",
    "output": "What were the differences in the amount of interest expense incurred in 2017 and 2018?"
  },
  {
    "input": "How much was the change in fair value of the company's servicing asset that was included in the fees charged for servicing?",
    "output": "How much was the included change in fair value of the company's servicing asset included in its servicing fees?"
  },
  {
    "input": "For how many consecutive years have Interchange fees surpassed $50,000?",
    "output": "How many years did Interchange fees exceed $50,000 thousand?"
  },
  {
    "input": "How many years have the Interchange fees been over $50,000 thousand?",
    "output": "How many years did Interchange fees exceed $50,000 thousand?"
  },
  {
    "input": "What was the difference in the fees charged for transactions in 2017 compared to 2019? How did the transaction fees change between 2017 and 2019?",
    "output": "What was the change in the transaction fees between 2017 and 2019?"
  },
  {
    "input": "What was the percentage change in the total revenue between 2018 and 2019?",
    "output": "What is the percentage increase or decrease in the total revenue from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in the total revenue between 2018 and 2019?",
    "output": "What was the percentage difference in the overall revenue from 2018 to 2019?"
  },
  {
    "input": "How much did the company invest in partnerships during the year 2018?",
    "output": "What was the company's investment in partnership in 2018?"
  },
  {
    "input": "How much money did the company invest in partnership during the year 2018?",
    "output": "What was the company's investment in partnership in 2018?"
  },
  {
    "input": "What is the significance and meaning of the number of Settlements?",
    "output": "What does the amount of Settlements represent?"
  },
  {
    "input": "What is the significance or meaning of the number of Settlements?",
    "output": "What does the amount of Settlements represent?"
  },
  {
    "input": "How many years did the face value of term loan exceed $300,000 thousand?",
    "output": "For how many years has the term loan been valued above $300,000 thousand?"
  },
  {
    "input": "How many years did the face value of term loan exceed $300,000 thousand?",
    "output": "How many years has the face value of the term loan been consistently higher than $300,000 thousand?"
  },
  {
    "input": "What was the change in the Loan Servicing Portfolio between 2017 and 2018?",
    "output": "What was the difference in the size of the Loan Servicing Portfolio from 2017 to 2018?"
  },
  {
    "input": "What was the total costs and expenses?",
    "output": "What was the overall amount of costs and expenses associated with the given context or situation?"
  },
  {
    "input": "What was the exact value of the restricted cash in the year 2017?",
    "output": "What was the amount of restricted cash in 2017?"
  },
  {
    "input": "\"What is the range of years covered in the table?\"",
    "output": "Which years does the table provide?"
  },
  {
    "input": "What was the change in Cash and cash equivalents between 2018 and 2019?",
    "output": "What was the difference in the amount of Cash and cash equivalents between 2018 and 2019?"
  },
  {
    "input": "For how long did the amount of Restricted Cash surpass $200,000 thousand?",
    "output": "How many years did Restricted Cash exceed $200,000 thousand?"
  },
  {
    "input": "What was the total quantity of Furniture sold or produced in the year 2018?",
    "output": "What was the amount of Furniture in 2018?"
  },
  {
    "input": "What was the total quantity or number of furniture items in the year 2018?",
    "output": "What was the amount of Furniture in 2018?"
  },
  {
    "input": "What was the total quantity of computer hardware devices manufactured or sold globally in the year 2019?",
    "output": "What was the amount of Computer hardware in 2019?"
  },
  {
    "input": "How much computer hardware was present in the year 2019?",
    "output": "What was the amount of Computer hardware in 2019?"
  },
  {
    "input": "What was the change in the amount of Software between 2018 and 2019?",
    "output": "What was the difference in the quantity of Software from 2018 to 2019, specifically looking at the change in amount?"
  },
  {
    "input": "What was the change in the amount of Software between 2018 and 2019?",
    "output": "How much did the amount of Software change from 2018 to 2019?"
  },
  {
    "input": "What was the amount of Receipts in 2017?",
    "output": "How many receipts were recorded in the year 2017?"
  },
  {
    "input": "What was the amount of Receipts in 2017?",
    "output": "\"How many total receipts were recorded in the year 2017?\""
  },
  {
    "input": "What was the change in the amount of Settlements between 2017 and 2019?",
    "output": "What was the net difference in the number of Settlements from 2017 to 2019?"
  },
  {
    "input": "What was the change in the amount of Settlements between 2017 and 2019?",
    "output": "What was the difference in the number of Settlements from 2017 to 2019 and how did it change over the two-year period?"
  },
  {
    "input": "What was the exact amount of revenue generated in the year 2018?",
    "output": "What was the total revenue in 2018?"
  },
  {
    "input": "What is the total amount of money earned in 2018?",
    "output": "What was the total revenue in 2018?"
  },
  {
    "input": "\"What was the specific financial figure measuring earnings from regular business operations, known as operating profit, during the year 2019?\"",
    "output": "What was the operating profit in 2019?"
  },
  {
    "input": "What was the operating profit in the first quarter?",
    "output": "What is the exact amount of operating profit recorded in the first quarter of the fiscal year?"
  },
  {
    "input": "What was the specific amount of net income generated in the third quarter?",
    "output": "What was the net income in the third quarter?"
  },
  {
    "input": "What was the percentage increase or decrease in the basic earnings per share of Class A common stock from the first quarter to the second quarter?",
    "output": "What was the percentage change in the basic earnings per share of Class A common stock between the first and second quarter?"
  },
  {
    "input": "\"What is the range of years covered in the table?\"",
    "output": "Which years does the table provide?"
  },
  {
    "input": "What was the difference in the amount of net income recorded between the years 2017 and 2018?",
    "output": "What was the change in net income between 2017 and 2018?"
  },
  {
    "input": "\"What is the meaning and significance of net fair value changes recognized in other gains (losses)?\"",
    "output": "What does the net Fair value changes recognized in other gains (losses) represent?"
  },
  {
    "input": "What is the representation of net fair value changes recognized in other gains (losses)?",
    "output": "What does the net Fair value changes recognized in other gains (losses) represent?"
  },
  {
    "input": "What was the beginning balance in 2018?",
    "output": "\"The starting balance for the year 2018, specifically referring to financial records or accounts, is requested. Please provide the initial amount present at the beginning of that year.\""
  },
  {
    "input": "Which years did the ending balance exceed $3,000 thousand?",
    "output": "In which years was the ending balance higher than $3,000 thousand?"
  },
  {
    "input": "What is the percentage of fair value time deposits compared to the overall fair value of marketable securities?",
    "output": "What percentage of the total fair value marketable securities is made up of fair value time deposits?"
  },
  {
    "input": "\"What is the current interest rate specified in the credit facility agreement that governs our credit arrangement?\"",
    "output": "What is the interest rate on the credit facility agreement?"
  },
  {
    "input": "\"What is the specific interest rate specified in the credit facility agreement that governs our borrowing terms?\"",
    "output": "What is the interest rate on the credit facility agreement?"
  },
  {
    "input": "What factors contributed to the increase in research and development expenses in 2019?",
    "output": "What are the reasons for higher research and development expense in 2019?"
  },
  {
    "input": "What is the net difference in research and development expense between 2019 and 2017?",
    "output": "What is the exact amount of change in research and development expense between the fiscal years of 2019 and 2017?"
  },
  {
    "input": "How much were the marketable securities worth in 2019?",
    "output": "What was the amount of marketable securities in 2019?"
  },
  {
    "input": "What was the precise value of marketable securities held in the year 2019?",
    "output": "What was the amount of marketable securities in 2019?"
  },
  {
    "input": "What is the difference in the total amount of shareholders' equity between the years 2018 and 2019?",
    "output": "What is the change in total shareholders' equity from 2018 to 2019?"
  },
  {
    "input": "How much did the total shareholders' equity change between 2018 and 2019?",
    "output": "What is the change in total shareholders' equity from 2018 to 2019?"
  },
  {
    "input": "How does the net (loss) income of 2019 compare to the net (loss) income of 2018?",
    "output": "What is the difference between Net (loss) income in 2018 and 2019?"
  },
  {
    "input": "What was the exact percentage increase in gross profit from the year 2018 to the year 2019?",
    "output": "What was the percentage increase in gross profit from 2018 to 2019?"
  },
  {
    "input": "What are the different components of expenses that need to be considered when starting production?",
    "output": "What are components of production start-up expense?"
  },
  {
    "input": "What is the total change in expenditure for initiating production between the years 2019 and 2017?",
    "output": "What is the net difference in production start-up expense between 2019 and 2017?"
  },
  {
    "input": "What is the difference in net sales amount in 2019 and 2018?",
    "output": "What is the difference in the net sales amount between the years 2019 and 2018? Please provide details and figures to compare the net sales amounts of these two specific years."
  },
  {
    "input": "What is the value of foreign government obligations as of December 31, 2019?",
    "output": "What is the total amount of foreign government obligations recorded as of December 31, 2019?"
  },
  {
    "input": "What is the comparative value of money market funds and foreign debt as of December 31, 2019, and how do they differ?",
    "output": "What is the difference in the value of money market funds and foreign debt as of December 31, 2019?"
  },
  {
    "input": "What is the increase in cash provided by operating activities from 2018 to 2019?",
    "output": "How much did the cash provided by operating activities increase from 2018 to 2019?"
  },
  {
    "input": "What is the difference in net cash and cash equivalents between 2019 and 2018?",
    "output": "What is the change in net cash and cash equivalents between the fiscal years 2019 and 2018?"
  },
  {
    "input": "\"What specific items or responsibilities are typically considered as part of other obligations?\"",
    "output": "What are included in other obligations?"
  },
  {
    "input": "What does the category of \"other obligations\" encompass and include?",
    "output": "What are included in other obligations?"
  },
  {
    "input": "Why was $72.2 million of unrecognized tax benefits excluded?",
    "output": "Why were the $72.2 million in unrecognized tax benefits excluded from something?"
  },
  {
    "input": "What percentage of the total contractual obligations is made up of total long-term debt obligations?",
    "output": "What is the proportion of the overall contractual obligations that consists specifically of the total amount owed in long-term debt obligations?"
  },
  {
    "input": "What is the distinction between the total amount paid in interest and the total sum of operating lease obligations?",
    "output": "What difference between total interest payments and total operating lease obligations?"
  },
  {
    "input": "What is the interest rate for the term loan that I am inquiring about?",
    "output": "What is the interest rate of the term loan?"
  },
  {
    "input": "What are the differences in the amounts of notes receivable recorded in 2018 and 2019?",
    "output": "What is the difference between notes receivables from 2018 to 2019?"
  },
  {
    "input": "What is the precise percentage by which the total prepaid expenses and other current assets increased from 2018 to 2019?",
    "output": "What is the percentage increase in total prepaid expenses and other current assets from 2018 to 2019?"
  },
  {
    "input": "How are the accounting procedures used to record changes in fair value of hedging instruments?",
    "output": "How are changes in fair value of hedging instruments accounted for?"
  },
  {
    "input": "What are the types of derivative instruments included in the total for other liabilities on a financial statement?",
    "output": "What is the total derivate instruments for other liabilities?"
  },
  {
    "input": "What are the types of derivative instruments used to measure the total value of other liabilities?",
    "output": "What is the total derivate instruments for other liabilities?"
  },
  {
    "input": "What is the ratio of the total derivatives classified as hedging instruments to the total derivatives not classified as hedging instruments under prepaid expenses and other current assets?",
    "output": "Under prepaid expenses and other current assets, what is the ratio of the total derivates designated as hedging instruments to those not designated as hedging instruments?"
  },
  {
    "input": "What was the increase in machinery and equipment from 2018 to 2019?",
    "output": "What was the percentage change in the value of machinery and equipment between 2018 and 2019?"
  },
  {
    "input": "What was the increase in machinery and equipment from 2018 to 2019?",
    "output": "What percentage growth was observed in the value of machinery and equipment between the years 2018 and 2019?"
  },
  {
    "input": "What are the differences in land statistics comparing the year 2018 and 2019?",
    "output": "What is the difference between land from 2018 to 2019?"
  },
  {
    "input": "What are the discrepancies in land conditions between the years 2018 and 2019?",
    "output": "What is the difference between land from 2018 to 2019?"
  },
  {
    "input": "What is the net change in selling, general, and administrative expense between the years 2019 and 2017?",
    "output": "What is the net difference in selling, general and administrative expense between 2019 and 2017?"
  },
  {
    "input": "What is the current percentage shareholding of Tata Sons Private Limited and Life Insurance Corporation of India, and how does it differ between the two entities?",
    "output": "What is the difference in percentage shareholding between Tata Sons Private Limited and Life Insurance Corporation of India?"
  },
  {
    "input": "What is the disparity in the quantity of equity shares possessed by Promoters and Insurance Companies?",
    "output": "What is the difference between number of equity shares held between Promoters and Insurance Companies?"
  },
  {
    "input": "How many shareholding accounts were there as of March 31, 2019?",
    "output": "What is the number of existing shareholding accounts as on March 31, 2019?  "
  },
  {
    "input": "As of March 31, 2019, what is the overall sum of all assets owned by someone or something?",
    "output": "What is the grand total of holdings as on March 31, 2019?"
  },
  {
    "input": "What is the overall value of assets and investments held as of March 31, 2019?",
    "output": "What is the grand total of holdings as on March 31, 2019?"
  },
  {
    "input": "What is the difference in the number of holdings within the share categories of '1-100' and '101-500'? How do the share categories of '1-100' and '101-500' differ in terms of the number of holdings they encompass?",
    "output": "What is the difference in holdings between the share categories of '1-100' and '101-500'?"
  },
  {
    "input": "What is the basis for consolidating shareholdings and how does it affect the overall company structure?",
    "output": "On what basis is shareholding consolidated?"
  },
  {
    "input": "What is the basis for the consolidation of shareholding?",
    "output": "On what basis is shareholding consolidated?"
  },
  {
    "input": "\"What is the proportion of unsecured loans out of the total debt at the start of the financial year?\"",
    "output": "At the beginning of the financial year, what percentage of total indebtedness is made up of unsecured loans?"
  },
  {
    "input": "What was the change in the quantity of shares transferred from 2011 to 2012 into the Investor Education and Protection Fund (IEPF)?",
    "output": "What is the difference in number of shares transferred to IEPF from 2011 to 2012?"
  },
  {
    "input": "What is the total remuneration for Ramakrishnan V?",
    "output": "What is the complete amount of compensation that Ramakrishnan V receives?"
  },
  {
    "input": "What is the total remuneration for Ramakrishnan V?",
    "output": "What is the total amount of money received as payment or compensation for services rendered by Ramakrishnan V?"
  },
  {
    "input": "What is the monetary worth of the stock option that the Company Secretary receives?",
    "output": "What is the value of Stock Option given to the Company Secretary?"
  },
  {
    "input": "What is the monetary worth or amount of Stock Option that has been granted or allocated to the Company Secretary as part of their compensation or benefits package?",
    "output": "What is the value of Stock Option given to the Company Secretary?"
  },
  {
    "input": "What is the specific value or percentage of commission that is usually given to the Chief Financial Officer (CFO) of a company?",
    "output": "What is the value of Commission given to the Chief Financial Officer?"
  },
  {
    "input": "\"What is the specific amount or percentage of commission that is typically awarded to the Chief Financial Officer (CFO) for their role?\"",
    "output": "What is the value of Commission given to the Chief Financial Officer?"
  },
  {
    "input": "What distinguishes the CFO from the Company Secretary in terms of Others and Allowances? Clarify and elaborate on the differences between these two roles regarding their responsibilities, remunerations, and any additional benefits they may receive.",
    "output": "What is the difference in Others, Allowances between the CFO and Company Secretary?"
  },
  {
    "input": "Which key managerial personnel had the highest total remuneration?",
    "output": "\"Who among the key managerial personnel received the highest total remuneration?\""
  },
  {
    "input": "How does the ratio of median remuneration between the two Executive Directors differ?",
    "output": "What is the difference in ratio to median remuneration between the two Executive Directors?"
  },
  {
    "input": "\"What is the difference in the amount of money received from sales or services between the years 2018 and 2019?\"",
    "output": "What is the change in revenue between 2018 and 2019?"
  },
  {
    "input": "What is the difference in the amount of money earned by the company between the years 2018 and 2019?",
    "output": "What is the change in revenue between 2018 and 2019?"
  },
  {
    "input": "What portion of total unconsolidated revenue in FY 2019 constitutes of export revenue?",
    "output": "What percentage of the total unconsolidated revenue for the fiscal year 2019 is comprised of revenue from exports?"
  },
  {
    "input": "What portion of total unconsolidated revenue in FY 2018 constitutes of export revenue?",
    "output": "What percentage of the overall unconsolidated revenue for the fiscal year 2018 is derived from export sales?"
  },
  {
    "input": "What is the difference in the amount of money earned from foreign exchanges between the fiscal years of 2018 and 2019?",
    "output": "What is the change in foreign exchange earnings between FY 2018 and FY 2019?"
  },
  {
    "input": "What is the change in Cost, Insurance and Freight (CIF) value of imports from FY 2018 to FY 2019?",
    "output": "How has the Cost, Insurance and Freight (CIF) value of imports changed from FY 2018 to FY 2019?"
  },
  {
    "input": "What is the specific amount of money charged as a sitting fee for N Chandrasekaran?",
    "output": "What is the sitting fee of N Chandrasekaran?"
  },
  {
    "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?",
    "output": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian, and in what context are these fees being compared or analyzed?"
  },
  {
    "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?",
    "output": "What is the difference in the sitting fees charged by O P Bhatt and Aarthi Subramanian?"
  },
  {
    "input": "What is the percentage change in pre-tax margin between 2017 and 2018?",
    "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018"
  },
  {
    "input": "What is the percentage change in Pre-tax margin between 2017 and 2018?",
    "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018"
  },
  {
    "input": "What does Net Income include?",
    "output": "\"What elements are included in the calculation of Net Income?\""
  },
  {
    "input": "What is the increase / (decrease) in revenue from 2018 to 2019?",
    "output": "How much did the revenue change from 2018 to 2019?"
  },
  {
    "input": "What is the percentage increase / (decrease) in Current liabilities from 2018 to 2019?",
    "output": "What is the percentage change in Current Liabilities between 2018 and 2019?"
  },
  {
    "input": "What caused the decrease in the actual shares?",
    "output": "What factors led to the decrease in the current number of shares held by individuals or entities?"
  },
  {
    "input": "What was the percentage change in Earnings per Share of common stock from continuing operations (basic) compared to the previous period?",
    "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?"
  },
  {
    "input": "What was the percentage change in the basic earnings per share of common stock from continuing operations?",
    "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?"
  },
  {
    "input": "How did the performance of hardware platforms compare from year to year?",
    "output": "How was the performance of hardware platforms year to year"
  },
  {
    "input": "What does the Pre-tax income performance implied? ",
    "output": "What is the meaning of Pre-tax income performance?"
  },
  {
    "input": "What was the pre-tax margin of 2019?",
    "output": "\"What was the pre-tax margin percentage for the year 2019?\""
  },
  {
    "input": "What was the pre-tax margin of 2019?",
    "output": "\"What was the specific pre-tax margin for the fiscal year of 2019?\""
  },
  {
    "input": "What was the External gross profit margin in 2019?",
    "output": "What was the gross profit margin from sources external to the company for the year 2019?"
  },
  {
    "input": "What is the average of Balance at January 1?",
    "output": "What is the average balance as of January 1st for all accounts (or customers, if applicable)?"
  },
  {
    "input": "What is the average balance amount as of December 31st?",
    "output": "What is the average of Balance at December 31?"
  },
  {
    "input": "What is the average balance amount recorded on December 31st for a specific period of time?",
    "output": "What is the average of Balance at December 31?"
  },
  {
    "input": "What is the increase / (decrease) in the net capitalized software from 2018 to 2019?",
    "output": "What is the change in the net capitalized software amount from 2018 to 2019?"
  },
  {
    "input": "What was the percentage change in pre-tax income from 2018 to 2019?",
    "output": "What was the increase / (decrease) in the Pre-tax income from 2018 to 2019?"
  },
  {
    "input": "What is the percentage change in gross profit for External Operating Systems Software from 2018 to 2019?",
    "output": "What is the increase / (decrease) in the External Operating Systems Software gross profit from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase / (decrease) in total short term debt?",
    "output": "What was the percentage change in the total amount of short term debt?"
  },
  {
    "input": "What was the percentage change in the pre-tax margin from 2018 to 2019?",
    "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?"
  },
  {
    "input": "What percentage change in the pre-tax margin occurred between 2018 and 2019?",
    "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?"
  },
  {
    "input": "What is the meaning or significance of the launch expense?",
    "output": "What does launch expense represent?"
  },
  {
    "input": "How many types of expenses are listed in the table?",
    "output": "How many categories of expenses are included in the table?"
  },
  {
    "input": "What was the average adjusted EBITDA for 2018 and 2019?",
    "output": "What was the average amount of adjusted EBITDA for both the years 2018 and 2019?"
  },
  {
    "input": "\"What is the chronological order of the gross profit for each financial year end as depicted in the table?\"",
    "output": "What is the gross profit for each financial year end shown in the table (in chronological order)?"
  },
  {
    "input": "What was the year with the highest value for other financial expenses?",
    "output": "In which year was the amount of other financial expenses the largest?"
  },
  {
    "input": "What was the net difference in the overall financial expenses incurred during the year 2019 as compared to 2018?",
    "output": "What was the change in the total financial expenses in 2019 from 2018?"
  },
  {
    "input": "What was the sum of all financial costs incurred in the year 2019?",
    "output": "What is the total amount of financial expenses in 2019?"
  },
  {
    "input": "What was the specific year when the amount of Current lease liabilities was greater than in any other year?",
    "output": "In which year was the amount of Current lease liabilities larger?"
  },
  {
    "input": "When did the amount of Current lease liabilities exceed the previous year's amount?",
    "output": "In which year was the amount of Current lease liabilities larger?"
  },
  {
    "input": "What is the total financial income for 2019?",
    "output": "What was the overall financial income for the year 2019?"
  },
  {
    "input": "What was the total number of financial items in the year 2019?",
    "output": "What is the amount of total financial items in 2019?"
  },
  {
    "input": "What is the total number of financial items recorded in the year 2019?",
    "output": "What is the amount of total financial items in 2019?"
  },
  {
    "input": "What are the primary categories that are examined within the Financial Items section of the table?",
    "output": "What are the main categories analyzed under Financial Items in the table?"
  },
  {
    "input": "In which year was the amount of financial income the smallest?",
    "output": "What is the year with the minimum financial income amount?"
  },
  {
    "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?",
    "output": "What was the difference in the average amount of capital invested excluding average impairment between 2019 and 2018?"
  },
  {
    "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?",
    "output": "What was the difference in the average amount of money invested minus the average amount of impairments in 2019 compared to 2018?"
  },
  {
    "input": "What was the percentage change in retained earnings in 2019 from 2018?",
    "output": "What was the precise percentage difference in retained earnings between 2018 and 2019, specifically referring to the change observed in 2019?"
  },
  {
    "input": "What was the percentage change in retained earnings in 2019 from 2018?",
    "output": "What was the percentage increase or decrease in the amount of retained earnings from the year 2018 to 2019?"
  },
  {
    "input": "How is Total Cost of Ownership (TCE) earnings utilized as a widely accepted performance measure for the shipping industry?",
    "output": "How is TCE earnings used as a standard shipping industry performance measure?"
  },
  {
    "input": "What is the specific year when the revenue reached its maximum value?",
    "output": "In which year was Revenue the largest?"
  },
  {
    "input": "What items were included in the freight receivables as of 2019?",
    "output": "As of 2019, what did freight receivables include?"
  },
  {
    "input": "What was included in the category of freight receivables as of 2019?",
    "output": "As of 2019, what did freight receivables include?"
  },
  {
    "input": "What is the determining basis for the calculation and establishment of an allowance for expected credit loss?",
    "output": "What is the making of allowance for expected credit loss based on?"
  },
  {
    "input": "What is the basis for determining the provision for expected credit loss?",
    "output": "What is the making of allowance for expected credit loss based on?"
  },
  {
    "input": "What is the specific year when the total gross freight receivables reached their highest amount?",
    "output": "In which year was the amount of total gross freight receivables the largest?"
  },
  {
    "input": "What was the net increase or decrease in the total gross freight receivables between 2018 and 2019?",
    "output": "What was the change in the total net gross freight receivables in 2019 from 2018?"
  },
  {
    "input": "What information can be derived in note 16?",
    "output": "What data or details can be obtained from note 16?"
  },
  {
    "input": "What information can be derived in note 16?",
    "output": "What kind of information can be obtained from note 16?"
  },
  {
    "input": "What was the change in the carrying amount as of 31 December from 2018 to 2019?",
    "output": "What was the difference in the carrying amount as of December 31 between the years 2018 and 2019? Please provide the specific change in the carrying amount."
  },
  {
    "input": "What are the specific components present in the table that are utilized for liquidity calculations?",
    "output": "What are the components in the table used to calculate liquidity?"
  },
  {
    "input": "What was the difference in liquidity levels between 2019 and 2018, and how did it change over the course of the year?",
    "output": "What was the change in liquidity in 2019 from 2018?"
  },
  {
    "input": "What does TORM have leases for?",
    "output": "What types of leases does TORM currently have?"
  },
  {
    "input": "How are leases represented on the balance sheet and what specific details are included for each lease?",
    "output": "How is each lease reflected on the balance sheet?"
  },
  {
    "input": "\"What is the asset type that experiences the highest rate of depreciation in a year?\"",
    "output": "Which asset type has the largest depreciation of the year?"
  },
  {
    "input": "What was the difference in the amount of gross profit in 2019 compared to 2018?",
    "output": "What was the change in gross profit in 2019 from 2018?"
  },
  {
    "input": "What was the exact percentage increase or decrease in the gross profit from 2018 to 2019?",
    "output": "What was the percentage change in gross profit in 2019 from 2018?"
  },
  {
    "input": "What was the percentage increase or decrease in gross profit from 2018 to 2019?",
    "output": "What was the percentage change in gross profit in 2019 from 2018?"
  },
  {
    "input": "\"What specific details or data can be found in Note 21 of the provided information?\"",
    "output": "What information is provided in Note 21?"
  },
  {
    "input": "What was the percentage change in interest rate swaps between the year 2018 and 2019?",
    "output": "What was the change in interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "How did interest rate swaps change from 2018 to 2019?",
    "output": "What was the change in interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "What was the percentage difference in the interest rate swaps between 2018 and 2019? I would like to know the rate at which the interest rate swaps changed between these two years.",
    "output": "What was the percentage change in interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "What was the precise percentage difference in the interest rate swap market between 2018 and 2019?",
    "output": "What was the percentage change in interest rate swaps from 2018 to 2019?"
  },
  {
    "input": "What is TORM's belief or estimation of the amount of net interest-bearing debt they possess?",
    "output": "What does TORM believe net interest-bearing debt to be?"
  },
  {
    "input": "What is the methodology used to calculate the net asset value (NAV) per share in investment funds?",
    "output": "How is NAV/share calculated?"
  },
  {
    "input": "What are the types of audit fees in the table?",
    "output": "What are the different types of audit fees listed in the table?"
  },
  {
    "input": "In what year did the auditor receive the highest overall compensation?",
    "output": "In which year was the total remuneration to the auditor the largest?"
  },
  {
    "input": "For which specific years were the calculations of Other Liabilities amounts conducted?",
    "output": "In which years were the amounts of Other Liabilities calculated for?"
  },
  {
    "input": "In which specific year did the amount allocated to Partners and commercial managements surpass the amount allocated in other years?",
    "output": "In which year was the amount under Partners and commercial managements larger?"
  },
  {
    "input": "\"What specific expenses are included in the category of operating expenses that are related to staff costs?\"",
    "output": "What are the staff costs included in operating expenses related to?"
  },
  {
    "input": "How is the average number of employees determined and what is the methodology behind its calculation?",
    "output": "How is the average number of employees calculated?"
  },
  {
    "input": "What was the change in the total number of permanent employees from 2018 to 2019?",
    "output": "How many permanent employees were added or lost between 2018 and 2019, resulting in the change in the total number of permanent employees?"
  },
  {
    "input": "What does note 21 include in terms of information?",
    "output": "What information does note 21 contain?"
  },
  {
    "input": "What does accounts receivables in the table include?",
    "output": "What specific components are included in the accounts receivables section of the table?"
  },
  {
    "input": "What specific items or costs are included in the income statement as Port expenses, including bunkers and commissions?",
    "output": "What is recognized in the income statement under Port expenses, bunkers and commissions?"
  },
  {
    "input": "How is allowance for expected credit loss of freight receivables calculated?",
    "output": "How is the calculation for allowance for expected credit loss determined specifically for freight receivables?"
  },
  {
    "input": "\"What specific components are accounted for in the carrying amount designated for 'Vessels and capitalized dry-docking'?\"",
    "output": "What is included in the carrying amount for \"Vessels and capitalized dry-docking\"?"
  },
  {
    "input": "What specific sub-elements, denoted in capital letters, can be found under the category of \"Vessels\" in the table related to dry-docking?",
    "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?"
  },
  {
    "input": "What specific components or sub-elements fall under the category of Vessels and capitalized dry-docking in the table?",
    "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?"
  },
  {
    "input": "\"What is the meaning of the phrase 'company's retirement obligations' and what specific aspects does it encompass?\"",
    "output": "What does the company's retirement obligations refer to?"
  },
  {
    "input": "\"What are the retirement obligations of the company and what do they encompass?\"",
    "output": "What does the company's retirement obligations refer to?"
  },
  {
    "input": "What is the company's total contractual cash obligations due in more than 5 years?",
    "output": "What is the total amount of cash that the company is obligated to pay according to its contracts, and when are these payments due, specifically for obligations that extend beyond a period of 5 years?"
  },
  {
    "input": "What is the company's total contractual cash obligations due in more than 5 years?",
    "output": "\"What is the total amount of cash that the company owes based on contractual agreements with a maturity period of more than 5 years?\""
  },
  {
    "input": "How much does operating leases account for total contractual cash obligations for period of less than 1 year?",
    "output": "What is the percentage of total contractual cash obligations attributable to operating leases with a duration of less than one year?"
  },
  {
    "input": "What is the specific monetary difference in the company's operating income between the years 2018 and 2019?",
    "output": "What is the company's increase in income from operating activities between 2018 and 2019? "
  },
  {
    "input": "What is the cash balance and the amount of cash equivalents held by the company as of August 31, 2019?",
    "output": "What is the company's cash and cash equivalents as at 31 August 2019?"
  },
  {
    "input": "What is the change in cash flow from investing activities from 2018 to 2019, specifically referring to any increase or decrease in the amount of cash generated or used in investing activities during this period?",
    "output": "What is the increase in cash flow from investing activities between 2018 and 2019?"
  },
  {
    "input": "What is the total cashflow from investing activities in both 2018 and 2019?",
    "output": "What is the combined amount of cash inflows and outflows from investing activities in 2018 and 2019?"
  },
  {
    "input": "What is the total cashflow from investing activities in both 2018 and 2019?",
    "output": "What was the net cash flow from investing activities in 2018 and 2019 combined?"
  },
  {
    "input": "What is the total effect of exchange rate changes on cash and cash equivalents in both 2018 and 2019?",
    "output": "What is the cumulative impact of fluctuations in exchange rates on the combined balances of cash and cash equivalents for the years 2018 and 2019?"
  },
  {
    "input": "When was the approval by Accenture's Board of Directors granted for the implementation of an open-market share purchase program specifically designed for the acquisition of Accenture's Class A ordinary shares?",
    "output": "When did Accenture's Board of Directors authorize and confirm an open-market share purchase program for acquiring Accenture pls Class A ordinary shares?"
  },
  {
    "input": "How much of the shares purchased in July were part of publicly announced plan or programs?",
    "output": "What proportion of the shares acquired in the month of July were specifically disclosed as part of publicly announced plan or programs?"
  },
  {
    "input": "How much of the shares purchased in July were part of publicly announced plan or programs?",
    "output": "What was the proportion of shares acquired in July that were part of publicly disclosed plans or programs?"
  },
  {
    "input": "What was the primary source of the company's operational revenue in the year 2019?",
    "output": "What is the company's main source of operating income in 2019?"
  },
  {
    "input": "What was the specific amount spent on expenses directly related to the production or provision of goods and services in the year 2019?",
    "output": "What was the cost of revenue in 2019?"
  },
  {
    "input": "What was the precise amount spent on expenses directly related to generating revenue in the year 2019?",
    "output": "What was the cost of revenue in 2019?"
  },
  {
    "input": "What was the total count of shares that were approved and allocated?",
    "output": "What was the number of granted shares?"
  },
  {
    "input": "How many shares were approved and given to individuals or entities as stocks?",
    "output": "What was the number of granted shares?"
  },
  {
    "input": "What is the disparity in the weighted-average grant date fair value between shares that have been allotted and subsequently forfeited?",
    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
  },
  {
    "input": "What is the difference in the weighted-average grant date fair value between unvested shares and vested shares in 2018?",
    "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?"
  },
  {
    "input": "What is the discrepancy in the weighted-average grant date fair value between unvested shares and vested shares in the year 2018?",
    "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?"
  },
  {
    "input": "What was the previously reported amount of total current assets?",
    "output": "What was the total current assets as previously reported?"
  },
  {
    "input": "What is the total value of the deferred commissions, including the current portion, and the total current assets as reported in the previous financial statement?",
    "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?"
  },
  {
    "input": "What was the total amount reported for deferred commissions, the current portion of deferred commissions, and the overall total for current assets in the previous report?",
    "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?"
  },
  {
    "input": "What is the revised percentage of accrued expenses in relation to the total sum of liabilities and stockholders' equity?",
    "output": "What was accrued expenses as revised as a percentage of total liabilities and stockholders' equity?"
  },
  {
    "input": "What was the company's bad debt expense for the year ending December 31, 2018?",
    "output": "What was the exact amount recorded as bad debt expense by the company for the entire year ending on December 31, 2018?"
  },
  {
    "input": "What was the company's bad debt expense for the year ending December 31, 2018?",
    "output": "What was the amount of bad debts that the company recorded as an expense for the fiscal year that ended on December 31, 2018?"
  },
  {
    "input": "What was the total amount of interest earned as income in the year 2017?",
    "output": "What was interest income in 2017?"
  },
  {
    "input": "What was the net income of others in 2019?",
    "output": "What was others, net in 2019?"
  },
  {
    "input": "What was the percentage change in Interest expense between 2017 and 2018?",
    "output": "What was the percentage increase or decrease in Interest expense from 2017 to 2018?"
  },
  {
    "input": "What is the average interest income from 2017-2019?",
    "output": "What is the average income generated from interest for the years 2017 to 2019 inclusive?"
  },
  {
    "input": "What percentage increase or decrease in interest income occurred from 2018 to 2019?",
    "output": "What was the percentage change in interest income between 2018 and 2019?"
  },
  {
    "input": "What is the average amount of net cash provided by operating activities for the years 2017, 2018, and 2019?",
    "output": "What was the average net cash provided by operating activities from 2017-2019?"
  },
  {
    "input": "What was the percentage change in the free flow cash margin from 2017 to 2018?",
    "output": "What was the change in free flow cash margin between 2017 and 2018?"
  },
  {
    "input": "What was the total amount of federal net operating losses incurred by the company in the year 2019?",
    "output": "What was the company's federal net operating losses in 2019?"
  },
  {
    "input": "What was the amount of federal net operating losses incurred by the company during the year 2019 for tax purposes?",
    "output": "What was the company's federal net operating losses in 2019?"
  },
  {
    "input": "What was the company's state net operating losses in 2019?",
    "output": "What was the total amount of net operating losses incurred by the company in the state during the year 2019?"
  },
  {
    "input": "What is the percentage change in Deferred revenue between 2018 and 2019?",
    "output": "What is the exact percentage difference in Deferred revenue from 2018 to 2019?"
  },
  {
    "input": "What was the precise amount spent on sales and marketing in the year 2018?",
    "output": "What was the sales and marketing expense in 2018?"
  },
  {
    "input": "What was the specific fair value amount of cash that is being referred to in the context of a certain situation or event?",
    "output": "What was the fair value amount of cash?"
  },
  {
    "input": "What was the specific fair value figure assigned to the amount of cash? Please provide the determined fair value amount for the cash in question.",
    "output": "What was the fair value amount of cash?"
  },
  {
    "input": "What percentage of fair value total cash equivalents consist of agency bonds?",
    "output": "What is the proportion of agency bonds, in terms of percentage, in the total cash equivalents that make up the fair value?"
  },
  {
    "input": "What updates and advancements were made in computer equipment and software from 2018 to 2019?",
    "output": "What is the change in computer equipment and software between 2018 and 2019?"
  },
  {
    "input": "What are the differences in computer equipment and software between the years 2018 and 2019?",
    "output": "What is the change in computer equipment and software between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in revenue from research and development from 2017 to 2018?",
    "output": "What was the change in percentage of revenue of research and development between 2017 and 2018?"
  },
  {
    "input": "What are the main promises or obligations that the company is committed to?",
    "output": "What are the company's principal commitments?"
  },
  {
    "input": "What are the main commitments of the company?",
    "output": "What are the company's principal commitments?"
  },
  {
    "input": "What is the sum of total operating lease obligations and Long-term debt obligations including interest?",
    "output": "What is the combined amount of operating lease obligations and long-term debt obligations, including accrued interest?"
  },
  {
    "input": "What portion of the total obligations is attributable to payments that are due within a time frame of 1-3 years?",
    "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?"
  },
  {
    "input": "What is the specific percentage of the overall obligations that are categorized as payments to be made within a period of 1-3 years?",
    "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?"
  },
  {
    "input": "\"What are the regulations or guidelines regarding the payment of Foreign income tax applicable for the year 2019?\"",
    "output": "What is the provision for Foreign income tax in 2019?"
  },
  {
    "input": "\"What are the regulations or rules implemented in 2019 regarding the taxation of foreign income?\"",
    "output": "What is the provision for Foreign income tax in 2019?"
  },
  {
    "input": "What is the disparity in the weighted-average grant date fair value when comparing shares that have been granted to those that have been forfeited?",
    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
  },
  {
    "input": "What is the disparity in weighted-average grant date fair value when comparing shares that have been granted with those that have been forfeited?",
    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
  },
  {
    "input": "What is the  Net sales for 2019?",
    "output": "What was the total global revenue generated from sales in the year 2019?"
  },
  {
    "input": "What was the percentage change in diluted net income per share from continuing operations from 2017 to 2018?",
    "output": "What was the change in Diluted Net income per share from continuing operations in 2018 from 2017?"
  },
  {
    "input": "What was the percentage increase or decrease in the Diluted Net income per share from continuing operations between 2018 and 2017?",
    "output": "What was the percentage change in Diluted Net income per share from continuing operations in 2018 from 2017?"
  },
  {
    "input": "What does the Euro Term Loan due 2024 consist of?",
    "output": "What is included in the Euro Term Loan due in 2024?"
  },
  {
    "input": "What does the Euro Term Loan due 2024 consist of?",
    "output": "\"What are the components and details of the Euro Term Loan due in 2024?\""
  },
  {
    "input": "In which year did the value of the 1.0% State of Connecticut term loan, which has a maturity date in 2023, surpass its previous size?",
    "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?"
  },
  {
    "input": "In which year did the size of the 1.0% State of Connecticut term loan due in 2023 increase?",
    "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?"
  },
  {
    "input": "What was the change in Capital lease obligations from 2018 to 2019?",
    "output": "What was the exact difference in the amount of Capital lease obligations between the years 2018 and 2019?"
  },
  {
    "input": "In which year was ILS a higher percentage of total net sales?",
    "output": "In which specific year did the Incremental Lifetime Sales (ILS) constitute a larger proportion of the overall net sales?"
  },
  {
    "input": "What were the net sales figures for ILS in both 2018 and 2019, and what was the average amount for each year?",
    "output": "What was the average amount of net sales for ILS in 2018 and 2019?"
  },
  {
    "input": "What were the average net sales for ILS for the years 2018 and 2019?",
    "output": "What was the average amount of net sales for ILS in 2018 and 2019?"
  },
  {
    "input": "What is the estimated duration of a building and any associated enhancements before they are deemed no longer useful?",
    "output": "What is the useful life of Buildings and improvements?"
  },
  {
    "input": "How is property and equipment stated in the table?",
    "output": "In what manner is property and equipment presented and described within the table?"
  },
  {
    "input": "In which year was the amount of Land larger?",
    "output": "\"When comparing the years, which year had a larger amount of land?\""
  },
  {
    "input": "What was the change in Leasehold improvements from 2018 to 2019?",
    "output": "What was the difference in Leasehold improvements value between 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in Short-term investments in 2019 from 2018?",
    "output": "What was the percentage difference in the value of Short-term investments between 2019 and 2018?"
  },
  {
    "input": "What was the percentage change in Short-term investments in 2019 from 2018?",
    "output": "What was the exact percentage difference in the value of Short-term investments between 2018 and 2019?"
  },
  {
    "input": "What was the net income for the year 2019?",
    "output": "What was Net income in 2019?"
  },
  {
    "input": "\"In which specific year did the company report the highest amount of comprehensive income?\"",
    "output": "In which year was Comprehensive income largest?"
  },
  {
    "input": "What is the largest year for Comprehensive income?",
    "output": "In which year was Comprehensive income largest?"
  },
  {
    "input": "What was the difference in the amount of money earned in 2018 compared to 2017 after all expenses and taxes were accounted for?",
    "output": "What was the change in Net income in 2018 from 2017?"
  },
  {
    "input": "What was the Projected Benefit Obligation (PBO) amount for the year 2019? Please provide the numerical value for the PBO as of December 31st, 2019.",
    "output": "What was the Projected benefit obligation in 2019?"
  },
  {
    "input": "In which years was information on defined benefit plans provided?",
    "output": "During which specific years was information regarding defined benefit plans provided?"
  },
  {
    "input": "In which year was the amount of Research and development largest?",
    "output": "In which specific year did Research and development reach its highest amount or expenditure?"
  },
  {
    "input": "What was the specific amount allocated for income taxes in the year 2019?",
    "output": "What was the Provision for income taxes in 2019?"
  },
  {
    "input": "What specific amount was set aside for income taxes in the fiscal year of 2019?",
    "output": "What was the Provision for income taxes in 2019?"
  },
  {
    "input": "What has been the modification in the amount of state income taxes that individuals are required to pay in 2019 compared to 2018?",
    "output": "What was the change in state income taxes that is currently payable in 2019 from 2018?"
  },
  {
    "input": "How much Microelectronics was present in the year 2019?",
    "output": "What is the amount of Microelectronics in 2019?"
  },
  {
    "input": "What is the total volume or quantity of Microelectronics produced or used worldwide in the year 2019?",
    "output": "What is the amount of Microelectronics in 2019?"
  },
  {
    "input": "In which years is net sales calculated?",
    "output": "In which specific years does the calculation of net sales occur?"
  },
  {
    "input": "In which years is net sales calculated?",
    "output": "During which specific years is the calculation of net sales undertaken?"
  },
  {
    "input": "What was the percentage difference in the quantity of OEM components and instrumentation between 2018 and 2019?",
    "output": "What was the percentage change in the amount of OEM components and instrumentation in 2019 from 2018?"
  },
  {
    "input": "What was the Net cash provided by operating activities in 2019?",
    "output": "What was the amount of net cash generated from operating activities during the year 2019?"
  },
  {
    "input": "What was the  Purchases of property and equipment  in 2018?",
    "output": "What was the total value of purchases made for property and equipment in the year 2018?"
  },
  {
    "input": "In which specific years are the instances or applications of physical currency emphasized or showcased in the table?",
    "output": "In which years are the uses of cash highlighted in the table?"
  },
  {
    "input": "What years are specified in the table to illustrate the purpose of utilizing cash?",
    "output": "In which years are the uses of cash highlighted in the table?"
  },
  {
    "input": "In which year was the Issuance of shares under employee stock plans larger?",
    "output": "When comparing the issuance of shares under employee stock plans, in which specific year was the volume of shares issued found to be greater?"
  },
  {
    "input": "In which year were the Additions charged to expenses the largest?",
    "output": "What is the specific year when the largest Additions were charged to expenses?"
  },
  {
    "input": "In which year were the Additions charged to expenses the largest?",
    "output": "What was the year when the largest amount of Additions were charged to expenses?"
  },
  {
    "input": "In which year did tax fees have a higher value or amount than in other years?",
    "output": "In which year were tax fees larger?"
  },
  {
    "input": "What was the difference in the amount of all other fees charged in 2019 compared to 2018?",
    "output": "What was the change in All other fees in 2019 from 2018?"
  },
  {
    "input": "What was Long-term taxes payable in 2019?",
    "output": "What was the amount of Long-term taxes payable in the year 2019?"
  },
  {
    "input": "In which year was Other long-term liabilities larger?",
    "output": "\"In which specific year did Other long-term liabilities exhibit a greater amount compared to the other years?\""
  },
  {
    "input": "In 2019, what was the exact amount of deferred compensation liability?",
    "output": "What was the Total deferred compensation liability in 2019?"
  },
  {
    "input": "For which years was the calculation of Total deferred compensation liability performed?",
    "output": "In which years was Total deferred compensation liability calculated?"
  },
  {
    "input": "How was the calculation for diluted earnings per share derived?",
    "output": "How was diluted earnings per share computed?"
  },
  {
    "input": "In which specific years can we find the earnings per share data that was included in the table?",
    "output": "In which years was earnings per share provided in the table?"
  },
  {
    "input": "What year experienced the highest dilutive impact from employee stock awards?",
    "output": "In which year was the Dilutive effect of employee stock awards largest?"
  },
  {
    "input": "In what specific year did employee stock awards have the most significant dilutive effect?",
    "output": "In which year was the Dilutive effect of employee stock awards largest?"
  },
  {
    "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?",
    "output": "What was the difference in the dilutive impact caused by employee stock awards between 2019 and 2018?"
  },
  {
    "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?",
    "output": "What was the difference in how much employee stock awards had a dilutive effect in 2019 compared to 2018?"
  },
  {
    "input": "What was the percentage change in Dilutive effect of employee stock awards in 2019 from 2018?",
    "output": "What was the percentage change in the dilutive effect of employee stock awards from 2018 to 2019?"
  },
  {
    "input": "In which specific year did the Amortization of intangible assets account for a larger proportion of the overall net sales?",
    "output": "In which year was Amortization of intangible assets a higher percentage of total net sales?"
  },
  {
    "input": "What was the change in the amount of Amortization of intangible assets in 2019 from 2018?",
    "output": "What was the difference in the amount of Amortization of intangible assets between the fiscal years 2019 and 2018?"
  },
  {
    "input": "What was the value of Net deferred tax assets in the financial year of 2019?",
    "output": "What was the Net deferred tax assets in 2019?"
  },
  {
    "input": "What was the difference in the initial balance for 2019 compared to 2018?",
    "output": "What was the change in Beginning balance in 2019 from 2018?"
  },
  {
    "input": "What was the difference in the starting balance in 2019 compared to 2018?",
    "output": "What was the change in Beginning balance in 2019 from 2018?"
  },
  {
    "input": "What was the balance in the accounts as of the start of the year in 2018?",
    "output": "What was the  Balance as of the beginning of the year  in 2018?"
  },
  {
    "input": "\"What was the exact monetary balance in the accounts as of January 1st, 2018, at the start of the year?\"",
    "output": "What was the  Balance as of the beginning of the year  in 2018?"
  },
  {
    "input": "What was the percentage change in Balance as of the beginning of the year in 2019 from 2018?",
    "output": "What is the percentage change in the Balance as of the start of 2019 compared to the Balance at the start of 2018?"
  },
  {
    "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?",
    "output": "What was the percentage difference in the amount of income taxes that were prepaid and refundable between the years 2018 and 2019?"
  },
  {
    "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?",
    "output": "What was the percentage difference in the amount of prepaid and refundable income taxes between 2018 and 2019?"
  },
  {
    "input": "What does the Current portion of Euro Term Loan consist of?",
    "output": "What is included in the current portion of the Euro Term Loan?"
  },
  {
    "input": "What was the net change in the amount of outstanding Capital lease obligations between the years 2018 and 2019?",
    "output": "What was the change in Capital lease obligations from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in Capital lease obligations from 2018 to 2019?",
    "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?"
  },
  {
    "input": "What was the percentage increase or decrease in Capital lease obligations between 2018 and 2019?",
    "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?"
  },
  {
    "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?",
    "output": "What were the specific values of the company's work in process inventories as of March 31 in both 2018 and 2019?"
  },
  {
    "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?",
    "output": "What is the dollar amount of the work in process inventories for the company on March 31 of both 2018 and 2019?"
  },
  {
    "input": "What was the value of the company's finished goods on March 31, 2018 and March 31, 2019?",
    "output": "What are the respective values of the company's finished goods on March 31, 2018 and 2019?"
  },
  {
    "input": "What is the change in the company's raw materials on March 31, 2018 and 2019?",
    "output": "What is the difference in the amount of raw materials held by the company as of March 31, 2018 and March 31, 2019?"
  },
  {
    "input": "What was the mean value of the company's raw materials on March 31 in both 2018 and 2019?",
    "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?"
  },
  {
    "input": "What is the average worth of the company's raw materials specifically on March 31st, both in the years 2018 and 2019?",
    "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?"
  },
  {
    "input": "What is the mean amount of the company's completed products on March 31, in both the years 2018 and 2019?",
    "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?"
  },
  {
    "input": "What is the average value of the finished goods held by the company on March 31 in both 2018 and 2019?",
    "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?"
  },
  {
    "input": "What are the company's respective foreign income before taxes in 2018 and 2019?",
    "output": "What were the pre-tax foreign incomes of the company in 2018 and 2019?"
  },
  {
    "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?",
    "output": "\"What was the average pre-tax loss incurred by the company in the United States for the years 2018 and 2019?\""
  },
  {
    "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?",
    "output": "What was the average pre-tax income loss for the company in the United States for both 2018 and 2019?"
  },
  {
    "input": "What is the company's average foreign income before taxes in 2018 and 2019?",
    "output": "What was the average amount of foreign income before taxes for the company in both 2018 and 2019?"
  },
  {
    "input": "What are the average pre-tax earnings of the company for the fiscal years 2018 and 2019?",
    "output": "What is the company's average total income before income taxes in 2018 and 2019?"
  },
  {
    "input": "What are the specific cost of goods sold for the company in both 2018 and 2019?",
    "output": "What are the company's respective cost of goods sold in 2018 and 2019?"
  },
  {
    "input": "What were the cost of goods sold for the company in 2018 and 2019?",
    "output": "What are the company's respective cost of goods sold in 2018 and 2019?"
  }
]


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py
================================================
from ktem.llms.manager import llms

from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage
from kotaemon.llms import ChatLLM, PromptTemplate

DEFAULT_REWRITE_PROMPT = (
    "Given the following question, rephrase and expand it "
    "to help you do better answering. Maintain all information "
    "in the original question. Keep the question as concise as possible. "
    "Only output the rephrased question without additional information. "
    "Give answer in {lang}\n"
    "Original question: {question}\n"
    "Rephrased question: "
)


class RewriteQuestionPipeline(BaseComponent):
    """Rewrite user question

    Args:
        llm: the language model to rewrite question
        rewrite_template: the prompt template for llm to paraphrase a text input
        lang: the language of the answer. Currently support English and Japanese
    """

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    rewrite_template: str = DEFAULT_REWRITE_PROMPT

    lang: str = "English"

    def run(self, question: str) -> Document:  # type: ignore
        prompt_template = PromptTemplate(self.rewrite_template)
        prompt = prompt_template.populate(question=question, lang=self.lang)
        messages = [
            SystemMessage(content="You are a helpful assistant"),
            HumanMessage(content=prompt),
        ]
        return self.llm(messages)


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/suggest_conversation_name.py
================================================
import logging

from ktem.llms.manager import llms

from kotaemon.base import AIMessage, BaseComponent, Document, HumanMessage, Node
from kotaemon.llms import ChatLLM, PromptTemplate

logger = logging.getLogger(__name__)


class SuggestConvNamePipeline(BaseComponent):
    """Suggest a good conversation name based on the chat history."""

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    SUGGEST_NAME_PROMPT_TEMPLATE = (
        "You are an expert at suggesting good and memorable conversation name. "
        "Based on the chat history above, "
        "suggest a good conversation name (max 10 words). "
        "Give answer in {lang}. Just output the conversation "
        "name without any extra."
    )
    prompt_template: str = SUGGEST_NAME_PROMPT_TEMPLATE
    lang: str = "English"

    def run(self, chat_history: list[tuple[str, str]]) -> Document:  # type: ignore
        prompt_template = PromptTemplate(self.prompt_template)
        prompt = prompt_template.populate(lang=self.lang)

        messages = []
        for human, ai in chat_history:
            messages.append(HumanMessage(content=human))
            messages.append(AIMessage(content=ai))

        messages.append(HumanMessage(content=prompt))

        return self.llm(messages)


================================================
FILE: libs/ktem/ktem/reasoning/prompt_optimization/suggest_followup_chat.py
================================================
import logging

from ktem.llms.manager import llms

from kotaemon.base import AIMessage, BaseComponent, Document, HumanMessage, Node
from kotaemon.llms import ChatLLM, PromptTemplate

logger = logging.getLogger(__name__)


class SuggestFollowupQuesPipeline(BaseComponent):
    """Suggest a list of follow-up questions based on the chat history."""

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    SUGGEST_QUESTIONS_PROMPT_TEMPLATE = (
        "Based on the chat history above. "
        "your task is to generate 3 to 5 relevant follow-up questions. "
        "These questions should be simple, very concise, "
        "and designed to guide the conversation further. "
        "Respond in JSON format with 'questions' key. "
        "Answer using the language {lang} same as the question. "
    )
    prompt_template: str = SUGGEST_QUESTIONS_PROMPT_TEMPLATE
    extra_prompt: str = """Example of valid response:
```json
{
    "questions": ["the weather is good", "what's your favorite city"]
}
```"""
    lang: str = "English"

    def run(self, chat_history: list[tuple[str, str]]) -> Document:
        prompt_template = PromptTemplate(self.prompt_template)
        prompt = prompt_template.populate(lang=self.lang) + self.extra_prompt

        messages = []
        for human, ai in chat_history[-3:]:
            messages.append(HumanMessage(content=human))
            messages.append(AIMessage(content=ai))

        messages.append(HumanMessage(content=prompt))

        return self.llm(messages)


================================================
FILE: libs/ktem/ktem/reasoning/react.py
================================================
import html
import logging
from typing import AnyStr, Optional, Type

from ktem.llms.manager import llms
from ktem.mcp.manager import mcp_manager
from ktem.reasoning.base import BaseReasoning
from ktem.utils.generator import Generator
from ktem.utils.render import Render
from langchain.text_splitter import CharacterTextSplitter
from pydantic import BaseModel, Field

from kotaemon.agents import (
    BaseTool,
    GoogleSearchTool,
    LLMTool,
    ReactAgent,
    WikipediaTool,
)
from kotaemon.agents.tools.mcp import create_tools_from_config
from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage
from kotaemon.llms import ChatLLM, PromptTemplate

from ..utils import SUPPORTED_LANGUAGE_MAP

logger = logging.getLogger(__name__)
DEFAULT_AGENT_STEPS = 4


class DocSearchArgs(BaseModel):
    query: str = Field(..., description="a search query as input to the doc search")


class DocSearchTool(BaseTool):
    name: str = "docsearch"
    description: str = (
        "A storage that contains internal documents. If you lack any specific "
        "private information to answer the question, you can search in this "
        "document storage. Furthermore, if you are unsure about which document that "
        "the user refers to, likely the user already selects the target document in "
        "this document storage, you just need to do normal search. If possible, "
        "formulate the search query as specific as possible."
    )
    args_schema: Optional[Type[BaseModel]] = DocSearchArgs
    retrievers: list[BaseComponent] = []

    def _run_tool(self, query: AnyStr) -> AnyStr:
        docs = []
        doc_ids = []
        for retriever in self.retrievers:
            for doc in retriever(text=query):
                if doc.doc_id not in doc_ids:
                    docs.append(doc)
                    doc_ids.append(doc.doc_id)

        return self.prepare_evidence(docs)

    def prepare_evidence(self, docs, trim_len: int = 4000):
        evidence = ""
        table_found = 0

        for _id, retrieved_item in enumerate(docs):
            retrieved_content = ""
            page = retrieved_item.metadata.get("page_label", None)
            source = filename = retrieved_item.metadata.get("file_name", "-")
            if page:
                source += f" (Page {page})"
            if retrieved_item.metadata.get("type", "") == "table":
                if table_found < 5:
                    retrieved_content = retrieved_item.metadata.get("table_origin", "")
                    if retrieved_content not in evidence:
                        table_found += 1
                        evidence += (
                            f"<br><b>Table from {source}</b>\n"
                            + retrieved_content
                            + "\n<br>"
                        )
            elif retrieved_item.metadata.get("type", "") == "chatbot":
                retrieved_content = retrieved_item.metadata["window"]
                evidence += (
                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
                    + retrieved_content
                    + "\n<br>"
                )
            elif retrieved_item.metadata.get("type", "") == "image":
                retrieved_content = retrieved_item.metadata.get("image_origin", "")
                retrieved_caption = html.escape(retrieved_item.get_content())
                evidence += (
                    f"<br><b>Figure from {source}</b>\n" + retrieved_caption + "\n<br>"
                )
            else:
                if "window" in retrieved_item.metadata:
                    retrieved_content = retrieved_item.metadata["window"]
                else:
                    retrieved_content = retrieved_item.text
                retrieved_content = retrieved_content.replace("\n", " ")
                if retrieved_content not in evidence:
                    evidence += (
                        f"<br><b>Content from {source}: </b> "
                        + retrieved_content
                        + " \n<br>"
                    )

            print("Retrieved #{}: {}".format(_id, retrieved_content[:100]))
            print("Score", retrieved_item.metadata.get("reranking_score", None))

        # trim context by trim_len
        if evidence:
            text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
                chunk_size=trim_len,
                chunk_overlap=0,
                separator=" ",
                model_name="gpt-3.5-turbo",
            )
            texts = text_splitter.split_text(evidence)
            evidence = texts[0]

        return Document(content=evidence)


TOOL_REGISTRY = {
    "Google": GoogleSearchTool(),
    "Wikipedia": WikipediaTool(),
    "LLM": LLMTool(),
    "SearchDoc": DocSearchTool(),
}

DEFAULT_QA_PROMPT = (
    "Answer the following questions as best you can. Give answer in {lang}. "
    "You have access to the following tools:\n"
    "{tool_description}\n"
    "Use the following format:\n\n"
    "Question: the input question you must answer\n"
    "Thought: you should always think about what to do\n\n"
    "Action: the action to take, should be one of [{tool_names}]\n\n"
    "Action Input: the input to the action, should be different from the action input "
    "of the same action in previous steps.\n\n"
    "Observation: the result of the action\n\n"
    "... (this Thought/Action/Action Input/Observation can repeat N times)\n"
    "#Thought: I now know the final answer\n"
    "Final Answer: the final answer to the original input question\n\n"
    "Begin! After each Action Input.\n\n"
    "Question: {instruction}\n"
    "Thought: {agent_scratchpad}\n"
)

DEFAULT_REWRITE_PROMPT = (
    "Given the following question, rephrase and expand it "
    "to help you do better answering. Maintain all information "
    "in the original question. Keep the question as concise as possible. "
    "Give answer in {lang}\n"
    "Original question: {question}\n"
    "Rephrased question: "
)


class RewriteQuestionPipeline(BaseComponent):
    """Rewrite user question

    Args:
        llm: the language model to rewrite question
        rewrite_template: the prompt template for llm to paraphrase a text input
        lang: the language of the answer. Currently support English and Japanese
    """

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    rewrite_template: str = DEFAULT_REWRITE_PROMPT

    lang: str = "English"

    def run(self, question: str) -> Document:  # type: ignore
        prompt_template = PromptTemplate(self.rewrite_template)
        prompt = prompt_template.populate(question=question, lang=self.lang)
        messages = [
            SystemMessage(content="You are a helpful assistant"),
            HumanMessage(content=prompt),
        ]
        return self.llm(messages)


class ReactAgentPipeline(BaseReasoning):
    """Question answering pipeline using ReAct agent."""

    class Config:
        allow_extra = True

    retrievers: list[BaseComponent]
    agent: ReactAgent = ReactAgent.withx()
    rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()
    use_rewrite: bool = False

    def prepare_citation(self, step_id, step, output, status) -> Document:
        header = "<b>Step {id}</b>: {log}".format(id=step_id, log=step.log)
        content = (
            "<b>Action</b>: <em>{tool}[{input}]</em>\n\n<b>Output</b>: {output}"
        ).format(
            tool=step.tool if status == "thinking" else "",
            input=step.tool_input.replace("\n", "").replace('"', "")
            if status == "thinking"
            else "",
            output=output if status == "thinking" else "Finished",
        )
        return Document(
            channel="info",
            content=Render.collapsible(
                header=header,
                content=Render.table(content),
                open=True,
            ),
        )

    async def ainvoke(  # type: ignore
        self, message, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Document:
        if self.use_rewrite:
            rewrite = await self.rewrite_pipeline(question=message)
            message = rewrite.text

        answer = self.agent(message)
        self.report_output(Document(content=answer.text, channel="chat"))

        intermediate_steps = answer.intermediate_steps
        for _, step_output in intermediate_steps:
            self.report_output(Document(content=step_output, channel="info"))

        self.report_output(None)
        return answer

    def stream(self, message, conv_id: str, history: list, **kwargs):
        if self.use_rewrite:
            rewrite = self.rewrite_pipeline(question=message)
            message = rewrite.text
            yield Document(
                channel="info",
                content=f"Rewrote the message to: {rewrite.text}",
            )

        output_stream = Generator(self.agent.stream(message))
        idx = 0
        for item in output_stream:
            idx += 1
            if item.status == "thinking":
                step, step_output = item.intermediate_steps
                yield Document(
                    channel="info",
                    content=self.prepare_citation(idx, step, step_output, item.status),
                )
            else:
                yield Document(
                    channel="chat",
                    content=item.text,
                )
                step, step_output = item.intermediate_steps
                yield Document(
                    channel="info",
                    content=self.prepare_citation(idx, step, step_output, item.status),
                )

        return output_stream.value

    @classmethod
    def get_pipeline(
        cls, settings: dict, states: dict, retrievers: list | None = None
    ) -> BaseReasoning:
        _id = cls.get_info()["id"]
        prefix = f"reasoning.options.{_id}"

        llm_name = settings[f"{prefix}.llm"]
        llm = llms.get(llm_name, llms.get_default())

        max_context_length_setting = settings.get("reasoning.max_context_length", None)

        pipeline = ReactAgentPipeline(retrievers=retrievers)
        pipeline.agent.llm = llm
        pipeline.agent.max_iterations = settings[f"{prefix}.max_iterations"]

        if max_context_length_setting:
            pipeline.agent.max_context_length = (
                max_context_length_setting // DEFAULT_AGENT_STEPS
            )

        tools = []
        for tool_name in settings[f"reasoning.options.{_id}.tools"]:
            if tool_name.startswith("[MCP] "):
                server_name = tool_name[len("[MCP] ") :]
                entry = mcp_manager.get(server_name)
                if entry:
                    config = entry["config"]
                    enabled_tools = config.pop("enabled_tools", None)
                    mcp_tools = create_tools_from_config(config, enabled_tools)
                    tools.extend(mcp_tools)
            else:
                tool = TOOL_REGISTRY[tool_name]
                if tool_name == "SearchDoc":
                    tool.retrievers = retrievers
                elif tool_name == "LLM":
                    tool.llm = llm
                tools.append(tool)
        pipeline.agent.plugins = tools
        pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get(
            settings["reasoning.lang"], "English"
        )
        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
        pipeline.agent.prompt_template = PromptTemplate(settings[f"{prefix}.qa_prompt"])

        return pipeline

    @classmethod
    def get_user_settings(cls) -> dict:
        llm = ""
        llm_choices = [("(default)", "")]
        try:
            llm_choices += [(_, _) for _ in llms.options().keys()]
        except Exception as e:
            logger.exception(f"Failed to get LLM options: {e}")

        tool_choices = ["Wikipedia", "Google", "LLM", "SearchDoc"]
        try:
            tool_choices += mcp_manager.get_enabled_tools()
        except Exception as e:
            logger.exception(f"Failed to get MCP tool options: {e}")

        return {
            "llm": {
                "name": "Language model",
                "value": llm,
                "component": "dropdown",
                "choices": llm_choices,
                "special_type": "llm",
                "info": (
                    "The language model to use for generating the answer. If None, "
                    "the application default language model will be used."
                ),
            },
            "tools": {
                "name": "Tools for knowledge retrieval",
                "value": ["SearchDoc", "LLM"],
                "component": "checkboxgroup",
                "choices": tool_choices,
            },
            "max_iterations": {
                "name": "Maximum number of iterations the LLM can go through",
                "value": 5,
                "component": "number",
            },
            "qa_prompt": {
                "name": "QA Prompt",
                "value": DEFAULT_QA_PROMPT,
            },
        }

    @classmethod
    def get_info(cls) -> dict:
        return {
            "id": "ReAct",
            "name": "ReAct Agent",
            "description": (
                "Implementing ReAct paradigm: https://arxiv.org/abs/2210.03629. "
                "ReAct agent answers the user's request by iteratively formulating "
                "plan and executing it. The agent can use multiple tools to gather "
                "information and generate the final answer."
            ),
        }


================================================
FILE: libs/ktem/ktem/reasoning/rewoo.py
================================================
import html
import logging
from difflib import SequenceMatcher
from typing import AnyStr, Generator, Optional, Type

from ktem.llms.manager import llms
from ktem.mcp.manager import mcp_manager
from ktem.reasoning.base import BaseReasoning
from ktem.utils.generator import Generator as GeneratorWrapper
from ktem.utils.render import Render
from langchain.text_splitter import CharacterTextSplitter
from pydantic import BaseModel, Field

from kotaemon.agents import (
    BaseTool,
    GoogleSearchTool,
    LLMTool,
    RewooAgent,
    WikipediaTool,
)
from kotaemon.agents.tools.mcp import create_tools_from_config
from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage
from kotaemon.llms import ChatLLM, PromptTemplate

from ..utils import SUPPORTED_LANGUAGE_MAP

logger = logging.getLogger(__name__)
DEFAULT_AGENT_STEPS = 4


DEFAULT_PLANNER_PROMPT = (
    "You are an AI agent who makes step-by-step plans to solve a problem under the "
    "help of external tools. For each step, make one plan followed by one tool-call, "
    "which will be executed later to retrieve evidence for that step.\n"
    "You should store each evidence into a distinct variable #E1, #E2, #E3 ... that "
    "can be referred to in later tool-call inputs.\n\n"
    "##Available Tools##\n"
    "{tool_description}\n\n"
    "##Output Format (Replace '<...>')##\n"
    "#Plan1: <describe your plan here>\n"
    "#E1: <toolname>[<input here>] (eg. Search[What is Python])\n"
    "#Plan2: <describe next plan>\n"
    "#E2: <toolname>[<input here, you can use #E1 to represent its expected output>]\n"
    "And so on...\n\n"
    "##Your Task##\n"
    "{task}\n\n"
    "##Now Begin##\n"
)

DEFAULT_SOLVER_PROMPT = (
    "You are an AI agent who solves a problem with my assistance. I will provide "
    "step-by-step plans(#Plan) and evidences(#E) that could be helpful.\n"
    "Your task is to briefly summarize each step, then make a short final conclusion "
    "for your task. Give answer in {lang}.\n\n"
    "##My Plans and Evidences##\n"
    "{plan_evidence}\n\n"
    "##Example Output##\n"
    "First, I <did something> , and I think <...>; Second, I <...>, "
    "and I think <...>; ....\n"
    "So, <your conclusion>.\n\n"
    "##Your Task##\n"
    "{task}\n\n"
    "##Now Begin##\n"
)


class DocSearchArgs(BaseModel):
    query: str = Field(..., description="a search query as input to the doc search")


class DocSearchTool(BaseTool):
    name: str = "docsearch"
    description: str = (
        "A storage that contains internal documents. If you lack any specific "
        "private information to answer the question, you can search in this "
        "document storage. Furthermore, if you are unsure about which document that "
        "the user refers to, likely the user already selects the target document in "
        "this document storage, you just need to do normal search. If possible, "
        "formulate the search query as specific as possible."
    )
    args_schema: Optional[Type[BaseModel]] = DocSearchArgs
    retrievers: list[BaseComponent] = []

    def _run_tool(self, query: AnyStr) -> AnyStr:
        docs = []
        doc_ids = []
        for retriever in self.retrievers:
            for doc in retriever(text=query):
                if doc.doc_id not in doc_ids:
                    docs.append(doc)
                    doc_ids.append(doc.doc_id)

        return self.prepare_evidence(docs)

    def prepare_evidence(self, docs, trim_len: int = 3000):
        evidence = ""
        table_found = 0

        for _id, retrieved_item in enumerate(docs):
            retrieved_content = ""
            page = retrieved_item.metadata.get("page_label", None)
            source = filename = retrieved_item.metadata.get("file_name", "-")
            if page:
                source += f" (Page {page})"
            if retrieved_item.metadata.get("type", "") == "table":
                if table_found < 5:
                    retrieved_content = retrieved_item.metadata.get("table_origin", "")
                    if retrieved_content not in evidence:
                        table_found += 1
                        evidence += (
                            f"<br><b>Table from {source}</b>\n"
                            + retrieved_content
                            + "\n<br>"
                        )
            elif retrieved_item.metadata.get("type", "") == "chatbot":
                retrieved_content = retrieved_item.metadata["window"]
                evidence += (
                    f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
                    + retrieved_content
                    + "\n<br>"
                )
            elif retrieved_item.metadata.get("type", "") == "image":
                retrieved_content = retrieved_item.metadata.get("image_origin", "")
                retrieved_caption = html.escape(retrieved_item.get_content())
                # PWS doesn't support VLM for images, we will just store the caption
                evidence += (
                    f"<br><b>Figure from {source}</b>\n" + retrieved_caption + "\n<br>"
                )
            else:
                if "window" in retrieved_item.metadata:
                    retrieved_content = retrieved_item.metadata["window"]
                else:
                    retrieved_content = retrieved_item.text
                retrieved_content = retrieved_content.replace("\n", " ")
                if retrieved_content not in evidence:
                    evidence += (
                        f"<br><b>Content from {source}: </b> "
                        + retrieved_content
                        + " \n<br>"
                    )

            print("Retrieved #{}: {}".format(_id, retrieved_content))
            print("Score", retrieved_item.metadata.get("reranking_score", None))

        # trim context by trim_len
        if evidence:
            text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
                chunk_size=trim_len,
                chunk_overlap=0,
                separator=" ",
                model_name="gpt-3.5-turbo",
            )
            texts = text_splitter.split_text(evidence)
            evidence = texts[0]

        return Document(content=evidence)


TOOL_REGISTRY = {
    "Google": GoogleSearchTool(),
    "Wikipedia": WikipediaTool(),
    "LLM": LLMTool(),
    "SearchDoc": DocSearchTool(),
}

DEFAULT_REWRITE_PROMPT = (
    "Given the following question, rephrase and expand it "
    "to help you do better answering. Maintain all information "
    "in the original question. Keep the question as concise as possible. "
    "Give answer in {lang}\n"
    "Original question: {question}\n"
    "Rephrased question: "
)


class RewriteQuestionPipeline(BaseComponent):
    """Rewrite user question

    Args:
        llm: the language model to rewrite question
        rewrite_template: the prompt template for llm to paraphrase a text input
        lang: the language of the answer. Currently support English and Japanese
    """

    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
    rewrite_template: str = DEFAULT_REWRITE_PROMPT

    lang: str = "English"

    def run(self, question: str) -> Document:  # type: ignore
        prompt_template = PromptTemplate(self.rewrite_template)
        prompt = prompt_template.populate(question=question, lang=self.lang)
        messages = [
            SystemMessage(content="You are a helpful assistant"),
            HumanMessage(content=prompt),
        ]
        return self.llm(messages)


def find_text(llm_output, context):
    sentence_list = llm_output.split("\n")
    matches = []
    for sentence in sentence_list:
        match = SequenceMatcher(
            None, sentence, context, autojunk=False
        ).find_longest_match()
        matches.append((match.b, match.b + match.size))
    return matches


class RewooAgentPipeline(BaseReasoning):
    """Question answering pipeline using ReWOO Agent."""

    class Config:
        allow_extra = True

    retrievers: list[BaseComponent]
    agent: RewooAgent = RewooAgent.withx()
    rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()
    use_rewrite: bool = False
    enable_citation: bool = False

    def format_info_panel_evidence(self, worker_log):
        header = ""
        content = []

        for line in worker_log.splitlines():
            if line.startswith("#Plan"):
                # line starts with #Plan should be marked as a new segment
                header = line
            elif line.startswith("#Action"):
                # small fix for markdown output
                line = "\\" + line + "<br>"
                content.append(line)
            elif line.startswith("#"):
                # stop markdown from rendering big headers
                line = "\\" + line
                content.append(line)
            else:
                content.append(line)

        if not header:
            return

        return Document(
            channel="info",
            content=Render.collapsible(
                header=header,
                content=Render.table("\n".join(content)),
                open=False,
            ),
        )

    def format_info_panel_planner(self, planner_output):
        planner_output = planner_output.replace("\n", "<br>")
        return Document(
            channel="info",
            content=Render.collapsible(
                header="Planner Output",
                content=planner_output,
                open=True,
            ),
        )

    def prepare_citation(self, answer) -> list[Document]:
        """Prepare citation to show on the UI"""
        segments = []
        split_indices = [
            0,
        ]
        start_indices = set()
        text = ""

        if "citation" in answer.metadata and answer.metadata["citation"] is not None:
            context = answer.metadata["worker_log"]
            for evidence in answer.metadata["citation"].evidences:
                matches = find_text(evidence, context)
                for match in matches:
                    split_indices.append(match[0])
                    split_indices.append(match[1])
                    start_indices.add(match[0])
            split_indices = sorted(list(set(split_indices)))
            spans = []
            prev = 0
            for index in split_indices:
                if index > prev:
                    spans.append(context[prev:index])
                    prev = index
            spans.append(context[split_indices[-1] :])

            prev = 0
            for span, start_idx in list(zip(spans, split_indices)):
                if start_idx in start_indices:
                    text += Render.highlight(span)
                else:
                    text += span

        else:
            text = answer.metadata["worker_log"]

        # separate text by detect header: #Plan
        for line in text.splitlines():
            if line.startswith("#Plan"):
                # line starts with #Plan should be marked as a new segment
                new_segment = [line]
                segments.append(new_segment)
            elif line.startswith("#Action"):
                # small fix for markdown output
                line = "\\" + line + "<br>"
                segments[-1].append(line)
            elif line.startswith("#"):
                # stop markdown from rendering big headers
                line = "\\" + line
                segments[-1].append(line)
            else:
                if segments:
                    segments[-1].append(line)
                else:
                    segments.append([line])

        outputs = []
        for segment in segments:
            outputs.append(
                Document(
                    channel="info",
                    content=Render.collapsible(
                        header=segment[0],
                        content=Render.table("\n".join(segment[1:])),
                        open=True,
                    ),
                )
            )

        return outputs

    async def ainvoke(  # type: ignore
        self, message, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Document:
        answer = self.agent(message, use_citation=True)
        self.report_output(Document(content=answer.text, channel="chat"))

        refined_citations = self.prepare_citation(answer)
        for _ in refined_citations:
            self.report_output(_)

        self.report_output(None)
        return answer

    def stream(  # type: ignore
        self, message, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Generator[Document, None, Document] | None:
        if self.use_rewrite:
            rewrite = self.rewrite_pipeline(question=message)
            message = rewrite.text
            yield Document(
                channel="info",
                content=f"Rewrote the message to: {rewrite.text}",
            )

        output_stream = GeneratorWrapper(
            self.agent.stream(message, use_citation=self.enable_citation)
        )
        for item in output_stream:
            if item.intermediate_steps:
                for step in item.intermediate_steps:
                    if "planner_log" in step:
                        yield Document(
                            channel="info",
                            content=self.format_info_panel_planner(step["planner_log"]),
                        )
                    else:
                        yield Document(
                            channel="info",
                            content=self.format_info_panel_evidence(step["worker_log"]),
                        )
            if item.text:
                # final answer
                yield Document(channel="chat", content=item.text)

        answer = output_stream.value
        yield Document(channel="info", content=None)
        yield from self.prepare_citation(answer)

        return answer

    @classmethod
    def get_pipeline(
        cls, settings: dict, states: dict, retrievers: list | None = None
    ) -> BaseReasoning:
        _id = cls.get_info()["id"]
        prefix = f"reasoning.options.{_id}"
        pipeline = RewooAgentPipeline(retrievers=retrievers)

        max_context_length_setting = settings.get("reasoning.max_context_length", None)

        planner_llm_name = settings[f"{prefix}.planner_llm"]
        planner_llm = llms.get(planner_llm_name, llms.get_default())
        solver_llm_name = settings[f"{prefix}.solver_llm"]
        solver_llm = llms.get(solver_llm_name, llms.get_default())

        pipeline.agent.planner_llm = planner_llm
        pipeline.agent.solver_llm = solver_llm
        if max_context_length_setting:
            pipeline.agent.max_context_length = (
                max_context_length_setting // DEFAULT_AGENT_STEPS
            )

        tools = []
        for tool_name in settings[f"{prefix}.tools"]:
            if tool_name.startswith("[MCP] "):
                server_name = tool_name[len("[MCP] ") :]
                entry = mcp_manager.get(server_name)
                if entry:
                    config = entry["config"]
                    enabled_tools = config.pop("enabled_tools", None)
                    mcp_tools = create_tools_from_config(config, enabled_tools)
                    tools.extend(mcp_tools)
            else:
                tool = TOOL_REGISTRY[tool_name]
                if tool_name == "SearchDoc":
                    tool.retrievers = retrievers
                elif tool_name == "LLM":
                    tool.llm = solver_llm
                tools.append(tool)
        pipeline.agent.plugins = tools
        pipeline.agent.output_lang = SUPPORTED_LANGUAGE_MAP.get(
            settings["reasoning.lang"], "English"
        )
        pipeline.agent.prompt_template["Planner"] = PromptTemplate(
            settings[f"{prefix}.planner_prompt"]
        )
        pipeline.agent.prompt_template["Solver"] = PromptTemplate(
            settings[f"{prefix}.solver_prompt"]
        )

        pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
        pipeline.rewrite_pipeline.llm = (
            planner_llm  # TODO: separate llm for rewrite if needed
        )

        return pipeline

    @classmethod
    def get_user_settings(cls) -> dict:

        llm = ""
        llm_choices = [("(default)", "")]
        try:
            llm_choices += [(_, _) for _ in llms.options().keys()]
        except Exception as e:
            logger.exception(f"Failed to get LLM options: {e}")

        tool_choices = ["Wikipedia", "Google", "LLM", "SearchDoc"]
        try:
            tool_choices += mcp_manager.get_enabled_tools()
        except Exception as e:
            logger.exception(f"Failed to get MCP tool options: {e}")

        return {
            "planner_llm": {
                "name": "Language model for Planner",
                "value": llm,
                "component": "dropdown",
                "choices": llm_choices,
                "special_type": "llm",
                "info": (
                    "The language model to use for planning. "
                    "This model will generate a plan based on the "
                    "instruction to find the answer."
                ),
            },
            "solver_llm": {
                "name": "Language model for Solver",
                "value": llm,
                "component": "dropdown",
                "choices": llm_choices,
                "special_type": "llm",
                "info": (
                    "The language model to use for solving. "
                    "This model will generate the answer based on the "
                    "plan generated by the planner and evidences found by the tools."
                ),
            },
            "highlight_citation": {
                "name": "Highlight Citation",
                "value": False,
                "component": "checkbox",
            },
            "tools": {
                "name": "Tools for knowledge retrieval",
                "value": ["SearchDoc", "LLM"],
                "component": "checkboxgroup",
                "choices": tool_choices,
            },
            "planner_prompt": {
                "name": "Planner Prompt",
                "value": DEFAULT_PLANNER_PROMPT,
            },
            "solver_prompt": {
                "name": "Solver Prompt",
                "value": DEFAULT_SOLVER_PROMPT,
            },
        }

    @classmethod
    def get_info(cls) -> dict:
        return {
            "id": "ReWOO",
            "name": "ReWOO Agent",
            "description": (
                "Implementing ReWOO paradigm: https://arxiv.org/abs/2305.18323. "
                "The ReWOO agent makes a step by step plan in the first stage, "
                "then solves each step in the second stage. The agent can use "
                "external tools to help in the reasoning process. Once all stages "
                "are completed, the agent will summarize the answer."
            ),
        }


================================================
FILE: libs/ktem/ktem/reasoning/simple.py
================================================
import logging
import threading
from textwrap import dedent
from typing import Generator

from decouple import config
from ktem.embeddings.manager import embedding_models_manager as embeddings
from ktem.llms.manager import llms
from ktem.reasoning.prompt_optimization import (
    DecomposeQuestionPipeline,
    RewriteQuestionPipeline,
)
from ktem.utils.render import Render
from ktem.utils.visualize_cited import CreateCitationVizPipeline
from plotly.io import to_json

from kotaemon.base import (
    AIMessage,
    BaseComponent,
    Document,
    HumanMessage,
    Node,
    RetrievedDocument,
    SystemMessage,
)
from kotaemon.indices.qa.citation_qa import (
    CONTEXT_RELEVANT_WARNING_SCORE,
    DEFAULT_QA_TEXT_PROMPT,
    AnswerWithContextPipeline,
)
from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
from kotaemon.indices.qa.utils import replace_think_tag_with_details
from kotaemon.llms import ChatLLM

from ..utils import SUPPORTED_LANGUAGE_MAP
from .base import BaseReasoning

logger = logging.getLogger(__name__)


class AddQueryContextPipeline(BaseComponent):

    n_last_interactions: int = 5
    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())

    def run(self, question: str, history: list) -> Document:
        messages = [
            SystemMessage(
                content="Below is a history of the conversation so far, and a new "
                "question asked by the user that needs to be answered by searching "
                "in a knowledge base.\nYou have access to a Search index "
                "with 100's of documents.\nGenerate a search query based on the "
                "conversation and the new question.\nDo not include cited source "
                "filenames and document names e.g info.txt or doc.pdf in the search "
                "query terms.\nDo not include any text inside [] or <<>> in the "
                "search query terms.\nDo not include any special characters like "
                "'+'.\nIf the question is not in English, rewrite the query in "
                "the language used in the question.\n If the question contains enough "
                "information, return just the number 1\n If it's unnecessary to do "
                "the searching, return just the number 0."
            ),
            HumanMessage(content="How did crypto do last year?"),
            AIMessage(
                content="Summarize Cryptocurrency Market Dynamics from last year"
            ),
            HumanMessage(content="What are my health plans?"),
            AIMessage(content="Show available health plans"),
        ]
        for human, ai in history[-self.n_last_interactions :]:
            messages.append(HumanMessage(content=human))
            messages.append(AIMessage(content=ai))

        messages.append(HumanMessage(content=f"Generate search query for: {question}"))

        resp = self.llm(messages).text
        if resp == "0":
            return Document(content="")

        if resp == "1":
            return Document(content=question)

        return Document(content=resp)


class FullQAPipeline(BaseReasoning):
    """Question answering pipeline. Handle from question to answer"""

    class Config:
        allow_extra = True

    # configuration parameters
    trigger_context: int = 150
    use_rewrite: bool = False

    retrievers: list[BaseComponent]

    evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()
    answering_pipeline: AnswerWithContextPipeline
    rewrite_pipeline: RewriteQuestionPipeline | None = None
    create_citation_viz_pipeline: CreateCitationVizPipeline = Node(
        default_callback=lambda _: CreateCitationVizPipeline(
            embedding=embeddings.get_default()
        )
    )
    add_query_context: AddQueryContextPipeline = AddQueryContextPipeline.withx()

    def retrieve(
        self, message: str, history: list
    ) -> tuple[list[RetrievedDocument], list[Document]]:
        """Retrieve the documents based on the message"""
        # if len(message) < self.trigger_context:
        #     # prefer adding context for short user questions, avoid adding context for
        #     # long questions, as they are likely to contain enough information
        #     # plus, avoid the situation where the original message is already too long
        #     # for the model to handle
        #     query = self.add_query_context(message, history).content
        # else:
        #     query = message
        # print(f"Rewritten query: {query}")
        query = None
        if not query:
            # TODO: previously return [], [] because we think this message as something
            # like "Hello", "I need help"...
            query = message

        docs, doc_ids = [], []
        plot_docs = []

        for idx, retriever in enumerate(self.retrievers):
            retriever_node = self._prepare_child(retriever, f"retriever_{idx}")
            retriever_docs = retriever_node(text=query)

            retriever_docs_text = []
            retriever_docs_plot = []

            for doc in retriever_docs:
                if doc.metadata.get("type", "") == "plot":
                    retriever_docs_plot.append(doc)
                else:
                    retriever_docs_text.append(doc)

            for doc in retriever_docs_text:
                if doc.doc_id not in doc_ids:
                    docs.append(doc)
                    doc_ids.append(doc.doc_id)

            plot_docs.extend(retriever_docs_plot)

        info = [
            Document(
                channel="info",
                content=Render.collapsible_with_header(doc, open_collapsible=True),
            )
            for doc in docs
        ] + [
            Document(
                channel="plot",
                content=doc.metadata.get("data", ""),
            )
            for doc in plot_docs
        ]

        return docs, info

    def prepare_mindmap(self, answer) -> Document | None:
        mindmap = answer.metadata["mindmap"]
        if mindmap:
            mindmap_text = mindmap.text
            mindmap_svg = dedent(
                """
                <div class="markmap">
                <script type="text/template">
                ---
                markmap:
                    colorFreezeLevel: 2
                    activeNode:
                        placement: center
                    initialExpandLevel: 4
                    maxWidth: 200
                ---
                {}
                </script>
                </div>
                """
            ).format(mindmap_text)

            mindmap_content = Document(
                channel="info",
                content=Render.collapsible(
                    header="""
                    <i>Mindmap</i>
                    <a href="#" id='mindmap-toggle'>
                        [Expand]</a>
                    <a href="#" id='mindmap-export'>
                        [Export]</a>""",
                    content=mindmap_svg,
                    open=True,
                ),
            )
        else:
            mindmap_content = None

        return mindmap_content

    def prepare_citation_viz(self, answer, question, docs) -> Document | None:
        doc_texts = [doc.text for doc in docs]
        citation_plot = None
        plot_content = None

        if answer.metadata["citation_viz"] and len(docs) > 1:
            try:
                citation_plot = self.create_citation_viz_pipeline(doc_texts, question)
            except Exception as e:
                print("Failed to create citation plot:", e)

            if citation_plot:
                plot = to_json(citation_plot)
                plot_content = Document(channel="plot", content=plot)

        return plot_content

    def show_citations_and_addons(self, answer, docs, question):
        # show the evidence
        with_citation, without_citation = self.answering_pipeline.prepare_citations(
            answer, docs
        )
        mindmap_output = self.prepare_mindmap(answer)
        citation_plot_output = self.prepare_citation_viz(answer, question, docs)

        if not with_citation and not without_citation:
            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
        else:
            # clear the Info panel
            max_llm_rerank_score = max(
                doc.metadata.get("llm_trulens_score", 0.0) for doc in docs
            )
            has_llm_score = any("llm_trulens_score" in doc.metadata for doc in docs)
            # clear previous info
            yield Document(channel="info", content=None)

            # yield mindmap output
            if mindmap_output:
                yield mindmap_output

            # yield citation plot output
            if citation_plot_output:
                yield citation_plot_output

            # yield warning message
            if has_llm_score and max_llm_rerank_score < CONTEXT_RELEVANT_WARNING_SCORE:
                yield Document(
                    channel="info",
                    content=(
                        "<h5>WARNING! Context relevance score is low. "
                        "Double check the model answer for correctness.</h5>"
                    ),
                )

            # show QA score
            qa_score = (
                round(answer.metadata["qa_score"], 2)
                if answer.metadata.get("qa_score")
                else None
            )
            if qa_score:
                yield Document(
                    channel="info",
                    content=f"<h5>Answer confidence: {qa_score}</h5>",
                )

            yield from with_citation
            if without_citation:
                yield from without_citation

    async def ainvoke(  # type: ignore
        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Document:  # type: ignore
        raise NotImplementedError

    def stream(  # type: ignore
        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Generator[Document, None, Document]:
        if self.use_rewrite and self.rewrite_pipeline:
            print("Chosen rewrite pipeline", self.rewrite_pipeline)
            message = self.rewrite_pipeline(question=message).text
            print("Rewrite result", message)

        print(f"Retrievers {self.retrievers}")
        # should populate the context
        docs, infos = self.retrieve(message, history)
        print(f"Got {len(docs)} retrieved documents")
        yield from infos

        evidence_mode, evidence, images = self.evidence_pipeline(docs).content

        def generate_relevant_scores():
            nonlocal docs
            docs = self.retrievers[0].generate_relevant_scores(message, docs)

        # generate relevant score using
        if evidence and self.retrievers:
            scoring_thread = threading.Thread(target=generate_relevant_scores)
            scoring_thread.start()
        else:
            scoring_thread = None

        answer = yield from self.answering_pipeline.stream(
            question=message,
            history=history,
            evidence=evidence,
            evidence_mode=evidence_mode,
            images=images,
            conv_id=conv_id,
            **kwargs,
        )

        # check <think> tag from reasoning models
        processed_answer = replace_think_tag_with_details(answer.text)
        if processed_answer != answer.text:
            # clear the chat message and render again
            yield Document(channel="chat", content=None)
            yield Document(channel="chat", content=processed_answer)

        # show the evidence
        if scoring_thread:
            scoring_thread.join()

        yield from self.show_citations_and_addons(answer, docs, message)

        return answer

    @classmethod
    def prepare_pipeline_instance(cls, settings, retrievers):
        return cls(
            retrievers=retrievers,
            rewrite_pipeline=None,
        )

    @classmethod
    def get_pipeline(cls, settings, states, retrievers):
        """Get the reasoning pipeline

        Args:
            settings: the settings for the pipeline
            retrievers: the retrievers to use
        """
        max_context_length_setting = settings.get("reasoning.max_context_length", 32000)

        pipeline = cls.prepare_pipeline_instance(settings, retrievers)

        prefix = f"reasoning.options.{cls.get_info()['id']}"
        llm_name = settings.get(f"{prefix}.llm", None)
        llm = llms.get(llm_name, llms.get_default())

        # prepare evidence pipeline configuration
        evidence_pipeline = pipeline.evidence_pipeline
        evidence_pipeline.max_context_length = max_context_length_setting

        # answering pipeline configuration
        use_inline_citation = settings[f"{prefix}.highlight_citation"] == "inline"

        if use_inline_citation:
            answer_pipeline = pipeline.answering_pipeline = AnswerWithInlineCitation()
        else:
            answer_pipeline = pipeline.answering_pipeline = AnswerWithContextPipeline()

        answer_pipeline.llm = llm
        answer_pipeline.citation_pipeline.llm = llm
        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
        answer_pipeline.enable_citation = (
            settings[f"{prefix}.highlight_citation"] != "off"
        )
        answer_pipeline.enable_mindmap = settings[f"{prefix}.create_mindmap"]
        answer_pipeline.enable_citation_viz = settings[f"{prefix}.create_citation_viz"]
        answer_pipeline.use_multimodal = settings[f"{prefix}.use_multimodal"]
        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
        answer_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
            settings["reasoning.lang"], "English"
        )

        pipeline.add_query_context.llm = llm
        pipeline.add_query_context.n_last_interactions = settings[
            f"{prefix}.n_last_interactions"
        ]

        pipeline.trigger_context = settings[f"{prefix}.trigger_context"]
        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
        if pipeline.rewrite_pipeline:
            pipeline.rewrite_pipeline.llm = llm
            pipeline.rewrite_pipeline.lang = SUPPORTED_LANGUAGE_MAP.get(
                settings["reasoning.lang"], "English"
            )
        return pipeline

    @classmethod
    def get_user_settings(cls) -> dict:
        from ktem.llms.manager import llms

        llm = ""
        choices = [("(default)", "")]
        try:
            choices += [(_, _) for _ in llms.options().keys()]
        except Exception as e:
            logger.exception(f"Failed to get LLM options: {e}")

        return {
            "llm": {
                "name": "Language model",
                "value": llm,
                "component": "dropdown",
                "choices": choices,
                "special_type": "llm",
                "info": (
                    "The language model to use for generating the answer. If None, "
                    "the application default language model will be used."
                ),
            },
            "highlight_citation": {
                "name": "Citation style",
                "value": (
                    "highlight"
                    if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool)
                    else "off"
                ),
                "component": "radio",
                "choices": [
                    ("citation: highlight", "highlight"),
                    ("citation: inline", "inline"),
                    ("no citation", "off"),
                ],
            },
            "create_mindmap": {
                "name": "Create Mindmap",
                "value": False,
                "component": "checkbox",
            },
            "create_citation_viz": {
                "name": "Create Embeddings Visualization",
                "value": False,
                "component": "checkbox",
            },
            "use_multimodal": {
                "name": "Use Multimodal Input",
                "value": False,
                "component": "checkbox",
            },
            "system_prompt": {
                "name": "System Prompt",
                "value": ("This is a question answering system."),
            },
            "qa_prompt": {
                "name": "QA Prompt (contains {context}, {question}, {lang})",
                "value": DEFAULT_QA_TEXT_PROMPT,
            },
            "n_last_interactions": {
                "name": "Number of interactions to include",
                "value": 5,
                "component": "number",
                "info": "The maximum number of chat interactions to include in the LLM",
            },
            "trigger_context": {
                "name": "Maximum message length for context rewriting",
                "value": 150,
                "component": "number",
                "info": (
                    "The maximum length of the message to trigger context addition. "
                    "Exceeding this length, the message will be used as is."
                ),
            },
        }

    @classmethod
    def get_info(cls) -> dict:
        return {
            "id": "simple",
            "name": "Simple QA",
            "description": (
                "Simple RAG-based question answering pipeline. This pipeline can "
                "perform both keyword search and similarity search to retrieve the "
                "context. After that it includes that context to generate the answer."
            ),
        }


class FullDecomposeQAPipeline(FullQAPipeline):
    def answer_sub_questions(
        self, messages: list, conv_id: str, history: list, **kwargs
    ):
        output_str = ""
        for idx, message in enumerate(messages):
            yield Document(
                channel="chat",
                content=f"<br><b>Sub-question {idx + 1}</b>"
                f"<br>{message}<br><b>Answer</b><br>",
            )
            # should populate the context
            docs, infos = self.retrieve(message, history)
            print(f"Got {len(docs)} retrieved documents")

            yield from infos

            evidence_mode, evidence, images = self.evidence_pipeline(docs).content
            answer = yield from self.answering_pipeline.stream(
                question=message,
                history=history,
                evidence=evidence,
                evidence_mode=evidence_mode,
                images=images,
                conv_id=conv_id,
                **kwargs,
            )

            output_str += (
                f"Sub-question {idx + 1}-th: '{message}'\nAnswer: '{answer.text}'\n\n"
            )

        return output_str

    def stream(  # type: ignore
        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
    ) -> Generator[Document, None, Document]:
        sub_question_answer_output = ""
        if self.rewrite_pipeline:
            print("Chosen rewrite pipeline", self.rewrite_pipeline)
            result = self.rewrite_pipeline(question=message)
            print("Rewrite result", result)
            if isinstance(result, Document):
                message = result.text
            elif (
                isinstance(result, list)
                and len(result) > 0
                and isinstance(result[0], Document)
            ):
                yield Document(
                    channel="chat",
                    content="<h4>Sub questions and their answers</h4>",
                )
                sub_question_answer_output = yield from self.answer_sub_questions(
                    [r.text for r in result], conv_id, history, **kwargs
                )

        yield Document(
            channel="chat",
            content=f"<h4>Main question</h4>{message}<br><b>Answer</b><br>",
        )

        # should populate the context
        docs, infos = self.retrieve(message, history)
        print(f"Got {len(docs)} retrieved documents")
        yield from infos

        evidence_mode, evidence, images = self.evidence_pipeline(docs).content
        answer = yield from self.answering_pipeline.stream(
            question=message,
            history=history,
            evidence=evidence + "\n" + sub_question_answer_output,
            evidence_mode=evidence_mode,
            images=images,
            conv_id=conv_id,
            **kwargs,
        )

        # show the evidence
        with_citation, without_citation = self.answering_pipeline.prepare_citations(
            answer, docs
        )
        if not with_citation and not without_citation:
            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
        else:
            yield Document(channel="info", content=None)
            yield from with_citation
            yield from without_citation

        return answer

    @classmethod
    def get_user_settings(cls) -> dict:
        user_settings = super().get_user_settings()
        user_settings["decompose_prompt"] = {
            "name": "Decompose Prompt",
            "value": DecomposeQuestionPipeline.DECOMPOSE_SYSTEM_PROMPT_TEMPLATE,
        }
        return user_settings

    @classmethod
    def prepare_pipeline_instance(cls, settings, retrievers):
        prefix = f"reasoning.options.{cls.get_info()['id']}"
        pipeline = cls(
            retrievers=retrievers,
            rewrite_pipeline=DecomposeQuestionPipeline(
                prompt_template=settings.get(f"{prefix}.decompose_prompt")
            ),
        )
        return pipeline

    @classmethod
    def get_info(cls) -> dict:
        return {
            "id": "complex",
            "name": "Complex QA",
            "description": (
                "Use multi-step reasoning to decompose a complex question into "
                "multiple sub-questions. This pipeline can "
                "perform both keyword search and similarity search to retrieve the "
                "context. After that it includes that context to generate the answer."
            ),
        }


================================================
FILE: libs/ktem/ktem/rerankings/__init__.py
================================================


================================================
FILE: libs/ktem/ktem/rerankings/db.py
================================================
from typing import Type

from ktem.db.engine import engine
from sqlalchemy import JSON, Boolean, Column, String
from sqlalchemy.orm import DeclarativeBase
from theflow.settings import settings as flowsettings
from theflow.utils.modules import import_dotted_string


class Base(DeclarativeBase):
    pass


class BaseRerankingTable(Base):
    """Base table to store rerankings model"""

    __abstract__ = True

    name = Column(String, primary_key=True, unique=True)
    spec = Column(JSON, default={})
    default = Column(Boolean, default=False)


__base_reranking: Type[BaseRerankingTable] = (
    import_dotted_string(flowsettings.KH_TABLE_RERANKING, safe=False)
    if hasattr(flowsettings, "KH_TABLE_RERANKING")
    else BaseRerankingTable
)


class RerankingTable(__base_reranking):  # type: ignore
    __tablename__ = "reranking"


if not getattr(flowsettings, "KH_ENABLE_ALEMBIC", False):
    RerankingTable.metadata.create_all(engine)


================================================
FILE: libs/ktem/ktem/rerankings/manager.py
================================================
from typing import Optional, Type

from sqlalchemy import select
from sqlalchemy.orm import Session
from theflow.settings import settings as flowsettings
from theflow.utils.modules import deserialize

from kotaemon.rerankings.base import BaseReranking

from .db import RerankingTable, engine


class RerankingManager:
    """Represent a pool of rerankings models"""

    def __init__(self):
        self._models: dict[str, BaseReranking] = {}
        self._info: dict[str, dict] = {}
        self._default: str = ""
        self._vendors: list[Type] = []

        # populate the pool if empty
        if hasattr(flowsettings, "KH_RERANKINGS"):
            with Session(engine) as sess:
                count = sess.query(RerankingTable).count()
            if not count:
                for name, model in flowsettings.KH_RERANKINGS.items():
                    self.add(
                        name=name,
                        spec=model["spec"],
                        default=model.get("default", False),
                    )

        self.load()
        self.load_vendors()

    def load(self):
        """Load the model pool from database"""
        self._models, self._info, self._default = {}, {}, ""
        with Session(engine) as sess:
            stmt = select(RerankingTable)
            items = sess.execute(stmt)

            for (item,) in items:
                self._models[item.name] = deserialize(item.spec, safe=False)
                self._info[item.name] = {
                    "name": item.name,
                    "spec": item.spec,
                    "default": item.default,
                }
                if item.default:
                    self._default = item.name

    def load_vendors(self):
        from kotaemon.rerankings import (
            CohereReranking,
            TeiFastReranking,
            VoyageAIReranking,
        )

        self._vendors = [TeiFastReranking, CohereReranking, VoyageAIReranking]

    def __getitem__(self, key: str) -> BaseReranking:
        """Get model by name"""
        return self._models[key]

    def __contains__(self, key: str) -> bool:
        """Check if model exists"""
        return key in self._models

    def get(
        self, key: str, default: Optional[BaseReranking] = None
    ) -> Optional[BaseReranking]:
        """Get model by name with default value"""
        return self._models.get(key, default)

    def settings(self) -> dict:
        """Present model pools option for gradio"""
        return {
            "label": "Reranking",
            "choices": list(self._models.keys()),
            "value": self.get_default_name(),
        }

    def options(self) -> dict:
        """Present a dict of models"""
        return self._models

    def get_random_name(self) -> str:
        """Get the name of random model

        Returns:
            str: random model name in the pool
        """
        import random

        if not self._models:
            raise ValueError("No models is pool")

        return random.choice(list(self._models.keys()))

    def get_default_name(self) -> str:
        """Get the name of default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            str: model name
        """
        if not self._models:
            raise ValueError("No models in pool")

        if not self._default:
            return self.get_random_name()

        return self._default

    def get_random(self) -> BaseReranking:
        """Get random model"""
        return self._models[self.get_random_name()]

    def get_default(self) -> BaseReranking:
        """Get default model

        In case there is no default model, choose random model from pool. In
        case there are multiple default models, choose random from them.

        Returns:
            BaseReranking: model
        """
        return self._models[self.get_default_name()]

    def info(self) -> dict:
        """List all models"""
        return self._info

    def add(self, name: str, spec: dict, default: bool):
        if not name:
            raise ValueError("Name must not be empty")

        try:
            with Session(engine) as sess:
                if default:
                    # turn all models to non-default
                    sess.query(RerankingTable).update({"default": False})
                    sess.commit()

                item = RerankingTable(name=name, spec=spec, default=default)
                sess.add(item)
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to add model {name}: {e}")

        self.load()

    def delete(self, name: str):
        """Delete a model from the pool"""
        try:
            with Session(engine) as sess:
                item = sess.query(RerankingTable).filter_by(name=name).first()
                sess.delete(item)
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to delete model {name}: {e}")

        self.load()

    def update(self, name: str, spec: dict, default: bool, new_name: str = ""):
        """Update a model in the pool, optionally renaming it."""
        if not name:
            raise ValueError("Name must not be empty")

        if new_name and new_name != name:
            if new_name in self._info:
                raise ValueError(
                    f"Model '{new_name}' already exists. Use a unique name."
                )
            self.delete(name)
            self.add(new_name, spec=spec, default=default)
            return

        try:
            with Session(engine) as sess:

                if default:
                    # turn all models to non-default
                    sess.query(RerankingTable).update({"default": False})
                    sess.commit()

                item = sess.query(RerankingTable).filter_by(name=name).first()
                if not item:
                    raise ValueError(f"Model {name} not found")
                item.spec = spec
                item.default = default
                sess.commit()
        except Exception as e:
            raise ValueError(f"Failed to update model {name}: {e}")

        self.load()

    def vendors(self) -> dict:
        """Return list of vendors"""
        return {vendor.__qualname__: vendor for vendor in self._vendors}


reranking_models_manager = RerankingManager()


================================================
FILE: libs/ktem/ktem/rerankings/ui.py
================================================
from copy import deepcopy

import gradio as gr
import pandas as pd
import yaml
from ktem.app import BasePage
from ktem.utils.file import YAMLNoDateSafeLoader
from theflow.utils.modules import deserialize

from kotaemon.base import Document

from .manager import reranking_models_manager


def format_description(cls):
    params = cls.describe()["params"]
    params_lines = ["| Name | Type | Description |", "| --- | --- | --- |"]
    for key, value in params.items():
        if isinstance(value["auto_callback"], str):
            continue
        params_lines.append(f"| {key} | {value['type']} | {value['help']} |")
    return f"{cls.__doc__}\n\n" + "\n".join(params_lines)


class RerankingManagement(BasePage):
    def __init__(self, app):
        self._app = app
        self.spec_desc_default = (
            "# Spec description\n\nSelect a model to view the spec description."
        )
        self.on_building_ui()

    def on_building_ui(self):
        with gr.Tab(label="View"):
            self.rerank_list = gr.DataFrame(
                headers=["name", "vendor", "default"],
                interactive=False,
                column_widths=[30, 40, 30],
            )

            with gr.Column(visible=False) as self._selected_panel:
                self.selected_rerank_name = gr.Textbox(value="", visible=False)
                with gr.Row():
                    with gr.Column():
                        self.edit_default = gr.Checkbox(
                            label="Set default",
                            info=(
                                "Set this Reranking model as default. This default "
                                "Reranking will be used by other components by default "
                                "if no Reranking is specified for such components."
                            ),
                        )
                        self.edit_name = gr.Textbox(
                            label="Name",
                            info="Edit to rename this Reranking model.",
                        )
                        self.edit_spec = gr.Textbox(
                            label="Specification",
                            info="Specification of the Embedding model in YAML format",
                            lines=10,
                        )

                        with gr.Accordion(
                            label="Test connection", visible=False, open=False
                        ) as self._check_connection_panel:
                            with gr.Row():
                                with gr.Column(scale=4):
                                    self.connection_logs = gr.HTML(
                                        "Logs",
                                    )

                                with gr.Column(scale=1):
                                    self.btn_test_connection = gr.Button("Test")

                        with gr.Row(visible=False) as self._selected_panel_btn:
                            with gr.Column():
                                self.btn_edit_save = gr.Button(
                                    "Save", min_width=10, variant="primary"
                                )
                            with gr.Column():
                                self.btn_delete = gr.Button(
                                    "Delete", min_width=10, variant="stop"
                                )
                                with gr.Row():
                                    self.btn_delete_yes = gr.Button(
                                        "Confirm Delete",
                                        variant="stop",
                                        visible=False,
                                        min_width=10,
                                    )
                                    self.btn_delete_no = gr.Button(
                                        "Cancel", visible=False, min_width=10
                                    )
                            with gr.Column():
                                self.btn_close = gr.Button("Close", min_width=10)

                    with gr.Column():
                        self.edit_spec_desc = gr.Markdown("# Spec description")

        with gr.Tab(label="Add"):
            with gr.Row():
                with gr.Column(scale=2):
                    self.name = gr.Textbox(
                        label="Name",
                        info=(
                            "Must be unique and non-empty. "
                            "The name will be used to identify the reranking model."
                        ),
                    )
                    self.rerank_choices = gr.Dropdown(
                        label="Vendors",
                        info=(
                            "Choose the vendor of the Reranking model. Each vendor "
                            "has different specification."
                        ),
                    )
                    self.spec = gr.Textbox(
                        label="Specification",
                        info="Specification of the Embedding model in YAML format.",
                    )
                    self.default = gr.Checkbox(
                        label="Set default",
                        info=(
                            "Set this Reranking model as default. This default "
                            "Reranking will be used by other components by default "
                            "if no Reranking is specified for such components."
                        ),
                    )
                    self.btn_new = gr.Button("Add", variant="primary")

                with gr.Column(scale=3):
                    self.spec_desc = gr.Markdown(self.spec_desc_default)

    def _on_app_created(self):
        """Called when the app is created"""
        self._app.app.load(
            self.list_rerankings,
            inputs=[],
            outputs=[self.rerank_list],
        )
        self._app.app.load(
            lambda: gr.update(choices=list(reranking_models_manager.vendors().keys())),
            outputs=[self.rerank_choices],
        )

    def on_rerank_vendor_change(self, vendor):
        vendor = reranking_models_manager.vendors()[vendor]

        required: dict = {}
        desc = vendor.describe()
        for key, value in desc["params"].items():
            if value.get("required", False):
                required[key] = value.get("default", None)

            return yaml.dump(required), format_description(vendor)

    def on_register_events(self):
        self.rerank_choices.select(
            self.on_rerank_vendor_change,
            inputs=[self.rerank_choices],
            outputs=[self.spec, self.spec_desc],
        )
        self.btn_new.click(
            self.create_rerank,
            inputs=[self.name, self.rerank_choices, self.spec, self.default],
            outputs=None,
        ).success(self.list_rerankings, inputs=[], outputs=[self.rerank_list]).success(
            lambda: ("", None, "", False, self.spec_desc_default),
            outputs=[
                self.name,
                self.rerank_choices,
                self.spec,
                self.default,
                self.spec_desc,
            ],
        )
        self.rerank_list.select(
            self.select_rerank,
            inputs=self.rerank_list,
            outputs=[self.selected_rerank_name],
            show_progress="hidden",
        )
        self.selected_rerank_name.change(
            self.on_selected_rerank_change,
            inputs=[self.selected_rerank_name],
            outputs=[
                self._selected_panel,
                self._selected_panel_btn,
                # delete section
                self.btn_delete,
                self.btn_delete_yes,
                self.btn_delete_no,
                # edit section
                self.edit_name,
                self.edit_spec,
                self.edit_spec_desc,
                self.edit_default,
            ],
            show_progress="hidden",
        ).success(lambda: gr.update(value=""), outputs=[self.connection_logs])

        self.btn_delete.click(
            self.on_btn_delete_click,
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_delete_yes.click(
            self.delete_rerank,
            inputs=[self.selected_rerank_name],
            outputs=[self.selected_rerank_name],
            show_progress="hidden",
        ).then(
            self.list_rerankings,
            inputs=[],
            outputs=[self.rerank_list],
        )
        self.btn_delete_no.click(
            lambda: (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=False),
            ),
            inputs=[],
            outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
            show_progress="hidden",
        )
        self.btn_edit_save.click(
            self.save_rerank,
            inputs=[
                self.selected_rerank_name,
                self.edit_name,
                self.edit_default,
                self.edit_spec,
            ],
            outputs=[self.selected_rerank_name],
            show_progress="hidden",
        ).then(
            self.list_rerankings,
            inputs=[],
            outputs=[self.rerank_list],
        )
        self.btn_close.click(lambda: "", outputs=[self.selected_rerank_name])

        self.btn_test_connection.click(
            self.check_connection,
            inputs=[self.selected_rerank_name, self.edit_spec],
            outputs=[self.connection_logs],
        )

    def create_rerank(self, name, choices, spec, default):
        try:
            name = name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = (
                reranking_models_manager.vendors()[choices].__module__
                + "."
                + reranking_models_manager.vendors()[choices].__qualname__
            )

            reranking_models_manager.add(name, spec=spec, default=default)
            gr.Info(f'Reranking model "{name}" created successfully')
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(f"Failed to create Reranking model '{name}': {e}")

    def list_rerankings(self):
        """List the Reranking models"""
        items = []
        for item in reranking_models_manager.info().values():
            record = {}
            record["name"] = item["name"]
            record["vendor"] = item["spec"].get("__type__", "-").split(".")[-1]
            record["default"] = item["default"]
            items.append(record)

        if items:
            rerank_list = pd.DataFrame.from_records(items)
        else:
            rerank_list = pd.DataFrame.from_records(
                [{"name": "-", "vendor": "-", "default": "-"}]
            )

        return rerank_list

    def select_rerank(self, rerank_list, ev: gr.SelectData):
        if ev.value == "-" and ev.index[0] == 0:
            gr.Info("No reranking model is loaded. Please add first")
            return ""

        if not ev.selected:
            return ""

        return rerank_list["name"][ev.index[0]]

    def on_selected_rerank_change(self, selected_rerank_name):
        if selected_rerank_name == "":
            _selected_panel = gr.update(visible=False)
            _selected_panel_btn = gr.update(visible=False)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)
            edit_name = gr.update(value="")
            edit_spec = gr.update(value="")
            edit_spec_desc = gr.update(value="")
            edit_default = gr.update(value=False)
        else:
            _selected_panel = gr.update(visible=True)
            _selected_panel_btn = gr.update(visible=True)
            btn_delete = gr.update(visible=True)
            btn_delete_yes = gr.update(visible=False)
            btn_delete_no = gr.update(visible=False)

            info = deepcopy(reranking_models_manager.info()[selected_rerank_name])
            vendor_str = info["spec"].pop("__type__", "-").split(".")[-1]
            vendor = reranking_models_manager.vendors()[vendor_str]

            edit_name = selected_rerank_name
            edit_spec = yaml.dump(info["spec"])
            edit_spec_desc = format_description(vendor)
            edit_default = info["default"]

        return (
            _selected_panel,
            _selected_panel_btn,
            btn_delete,
            btn_delete_yes,
            btn_delete_no,
            edit_name,
            edit_spec,
            edit_spec_desc,
            edit_default,
        )

    def on_btn_delete_click(self):
        btn_delete = gr.update(visible=False)
        btn_delete_yes = gr.update(visible=True)
        btn_delete_no = gr.update(visible=True)

        return btn_delete, btn_delete_yes, btn_delete_no

    def check_connection(self, selected_rerank_name, selected_spec):
        log_content: str = ""
        try:
            log_content += f"- Testing model: {selected_rerank_name}<br>"
            yield log_content

            # Parse content & init model
            info = deepcopy(reranking_models_manager.info()[selected_rerank_name])

            # Parse content & create dummy response
            spec = yaml.load(selected_spec, Loader=YAMLNoDateSafeLoader)
            info["spec"].update(spec)

            rerank = deserialize(info["spec"], safe=False)

            if rerank is None:
                raise Exception(f"Can not found model: {selected_rerank_name}")

            log_content += "- Sending a message ([`Hello`], `Hi`)<br>"
            yield log_content
            _ = rerank([Document(content="Hello")], "Hi")

            log_content += (
                "<mark style='background: green; color: white'>- Connection success. "
                "</mark><br>"
            )
            yield log_content

            gr.Info(f"Embedding {selected_rerank_name} connect successfully")
        except Exception as e:
            print(e)
            log_content += (
                f"<mark style='color: yellow; background: red'>- Connection failed. "
                f"Got error:\n {str(e)}</mark>"
            )
            yield log_content

        return log_content

    def save_rerank(self, selected_rerank_name, edit_name, default, spec):
        try:
            new_name = edit_name.strip()
            spec = yaml.load(spec, Loader=YAMLNoDateSafeLoader)
            spec["__type__"] = reranking_models_manager.info()[selected_rerank_name][
                "spec"
            ]["__type__"]
            reranking_models_manager.update(
                selected_rerank_name, spec=spec, default=default, new_name=new_name
            )
            final_name = (
                new_name if new_name != selected_rerank_name else selected_rerank_name
            )
            gr.Info(f'Reranking model "{final_name}" saved successfully')
            return final_name
        except ValueError as e:
            raise gr.Error(str(e))
        except Exception as e:
            raise gr.Error(
                f'Failed to save Reranking model "{selected_rerank_name}": {e}'
            )

    def delete_rerank(self, selected_rerank_name):
        try:
            reranking_models_manager.delete(selected_rerank_name)
        except Exception as e:
            gr.Error(f'Failed to delete Reranking model "{selected_rerank_name}": {e}')
            return selected_rerank_name

        return ""


================================================
FILE: libs/ktem/ktem/settings.py
================================================
from typing import Any

from pydantic import BaseModel, Field


class SettingItem(BaseModel):
    """Represent a setting item

    Args:
        name: the name of the setting item
        value: the default value of the setting item
        choices: the list of choices of the setting item, if any
        metadata: the metadata of the setting item
        component: the expected UI component to render the setting
    """

    name: str
    value: Any
    choices: list = Field(default_factory=list)
    metadata: dict = Field(default_factory=dict)
    component: str = "text"
    special_type: str = ""


class BaseSettingGroup(BaseModel):
    settings: dict[str, "SettingItem"] = Field(default_factory=dict)
    options: dict[str, "BaseSettingGroup"] = Field(default_factory=dict)

    def _get_options(self) -> dict:
        return {}

    def finalize(self):
        """Finalize the setting group"""

    def flatten(self) -> dict:
        """Render the setting group into value"""
        output = {}
        for key, value in self.settings.items():
            output[key] = value.value

        output.update({f"options.{k}": v for k, v in self._get_options().items()})

        return output

    def get_setting_item(self, path: str) -> SettingItem:
        """Get the item based on dot notation"""
        path = path.strip(".")
        if "." not in path:
            return self.settings[path]

        key, sub_path = path.split(".", 1)
        if key != "options":
            raise ValueError(f"Invalid key {path}. Should starts with `options.*`")

        option_id, sub_path = sub_path.split(".", 1)
        option = self.options[option_id]
        return option.get_setting_item(sub_path)

    def __bool__(self):
        return bool(self.settings) or bool(self.options)


class SettingReasoningGroup(BaseSettingGroup):
    def _get_options(self) -> dict:
        output = {}
        for ex_name, ex_setting in self.options.items():
            for key, value in ex_setting.flatten().items():
                output[f"{ex_name}.{key}"] = value

        return output

    def finalize(self):
        """Finalize the setting"""
        options = list(self.options.keys())
        if options:
            self.settings["use"].choices = [(x, x) for x in options]
            self.settings["use"].value = options[0]


class SettingIndexOption(BaseSettingGroup):
    """Temporarily keep it here to see if we need this setting template
    for the index component
    """

    indexing: BaseSettingGroup
    retrieval: BaseSettingGroup

    def flatten(self) -> dict:
        """Render the setting group into value"""
        output = {}
        for key, value in self.indexing.flatten():
            output[f"indexing.{key}"] = value

        for key, value in self.retrieval.flatten():
            output[f"retrieval.{key}"] = value

        return output

    def get_setting_item(self, path: str) -> SettingItem:
        """Get the item based on dot notation"""
        path = path.strip(".")

        key, sub_path = path.split(".", 1)
        if key not in ["indexing", "retrieval"]:
            raise ValueError(
                f"Invalid key {path}. Should starts with `indexing.*` or `retrieval.*`"
            )

        value = getattr(self, key)
        return value.get_setting_item(sub_path)


class SettingIndexGroup(BaseSettingGroup):
    def _get_options(self) -> dict:
        output = {}
        for name, setting in self.options.items():
            for key, value in setting.flatten().items():
                output[f"{name}.{key}"] = value

        return output


class SettingGroup(BaseModel):
    application: BaseSettingGroup = Field(default_factory=BaseSettingGroup)
    index: SettingIndexGroup = Field(default_factory=SettingIndexGroup)
    reasoning: SettingReasoningGroup = Field(default_factory=SettingReasoningGroup)

    def flatten(self) -> dict:
        """Render the setting group into value"""
        output = {}
        for key, value in self.application.flatten().items():
            output[f"application.{key}"] = value

        for key, value in self.index.flatten().items():
            output[f"index.{key}"] = value

        for key, value in self.reasoning.flatten().items():
            output[f"reasoning.{key}"] = value

        return output

    def get_setting_item(self, path: str) -> SettingItem:
        """Get the item based on dot notation"""
        path = path.strip(".")

        key, sub_path = path.split(".", 1)
        if key not in ["application", "index", "reasoning"]:
            raise ValueError(
                f"Invalid key {path}. Should starts with `indexing.*` or `retrieval.*`"
            )

        value = getattr(self, key)
        return value.get_setting_item(sub_path)


================================================
FILE: libs/ktem/ktem/utils/__init__.py
================================================
from .conversation import get_file_names_regex, get_urls
from .lang import SUPPORTED_LANGUAGE_MAP

__all__ = ["SUPPORTED_LANGUAGE_MAP", "get_file_names_regex", "get_urls"]


================================================
FILE: libs/ktem/ktem/utils/commands.py
================================================
WEB_SEARCH_COMMAND = "web"


================================================
FILE: libs/ktem/ktem/utils/conversation.py
================================================
import re


def sync_retrieval_n_message(
    messages: list[list[str]],
    retrievals: list[str],
) -> list[str]:
    """Ensure len of  messages history and retrieval history are equal
    Empty string/Truncate will be used in case any difference exist
    """
    n_message = len(messages)  # include previous history
    n_retrieval = min(n_message, len(retrievals))

    diff = n_message - n_retrieval
    retrievals = retrievals[:n_retrieval] + ["" for _ in range(diff)]

    assert len(retrievals) == n_message

    return retrievals


def get_file_names_regex(input_str: str) -> tuple[list[str], str]:
    # get all file names with pattern @"filename" in input_str
    # also remove these file names from input_str
    pattern = r'@"([^"]*)"'
    matches = re.findall(pattern, input_str)
    input_str = re.sub(pattern, "", input_str).strip()

    return matches, input_str


def get_urls(input_str: str) -> tuple[list[str], str]:
    # get all urls in input_str
    # also remove these urls from input_str
    pattern = r"https?://[^\s]+"
    matches = re.findall(pattern, input_str)
    input_str = re.sub(pattern, "", input_str).strip()

    return matches, input_str


if __name__ == "__main__":
    print(sync_retrieval_n_message([[""], [""], [""]], []))


================================================
FILE: libs/ktem/ktem/utils/file.py
================================================
import yaml


class YAMLNoDateSafeLoader(yaml.SafeLoader):
    """Load datetime as strings, not dates"""

    @classmethod
    def remove_implicit_resolver(cls, tag_to_remove):
        """Remove implicit resolvers for a particular tag

        Args:
            tag_to_remove (str): YAML tag to remove
        """
        if "yaml_implicit_resolvers" not in cls.__dict__:
            cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()

        for first_letter, mappings in cls.yaml_implicit_resolvers.items():
            cls.yaml_implicit_resolvers[first_letter] = [
                (tag, regexp) for tag, regexp in mappings if tag != tag_to_remove
            ]


YAMLNoDateSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp")


================================================
FILE: libs/ktem/ktem/utils/generator.py
================================================
class Generator:
    """A generator that stores return value from another generator"""

    def __init__(self, gen):
        self.gen = gen

    def __iter__(self):
        self.value = yield from self.gen
        return self.value


================================================
FILE: libs/ktem/ktem/utils/hf_papers.py
================================================
from datetime import datetime, timedelta

import requests
from cachetools import TTLCache, cached

HF_API_URL = "https://huggingface.co/api/daily_papers"
ARXIV_URL = "https://arxiv.org/abs/{paper_id}"
SEMANTIC_SCHOLAR_QUERY_URL = "https://api.semanticscholar.org/graph/v1/paper/search/match?query={paper_name}"  # noqa
SEMANTIC_SCHOLAR_RECOMMEND_URL = (
    "https://api.semanticscholar.org/recommendations/v1/papers/"  # noqa
)
CACHE_TIME = 60 * 60 * 6  # 6 hours


# Function to parse the date string
def parse_date(date_str):
    return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")


@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
    try:
        r = requests.post(
            SEMANTIC_SCHOLAR_RECOMMEND_URL,
            json={
                "positivePaperIds": [semantic_scholar_id],
            },
            params={"fields": "externalIds,title,year", "limit": 14},  # type: ignore
        )
        return r.json()["recommendedPapers"]
    except KeyError as e:
        print(e)
        return []


def filter_recommendations(recommendations, max_paper_count=5):
    # include only arxiv papers
    arxiv_paper = [
        r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
    ]
    if len(arxiv_paper) > max_paper_count:
        arxiv_paper = arxiv_paper[:max_paper_count]
    return arxiv_paper


def format_recommendation_into_markdown(recommendations):
    comment = "(recommended by the Semantic Scholar API)\n\n"
    for r in recommendations:
        hub_paper_url = f"https://arxiv.org/abs/{r['externalIds']['ArXiv']}"
        comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"

    return comment


def get_paper_id_from_name(paper_name):
    try:
        response = requests.get(
            SEMANTIC_SCHOLAR_QUERY_URL.format(paper_name=paper_name)
        )
        response.raise_for_status()
        items = response.json()
        paper_id = items.get("data", [])[0].get("paperId")
    except Exception as e:
        print(e)
        return None

    return paper_id


def get_recommended_papers(paper_name):
    paper_id = get_paper_id_from_name(paper_name)
    recommended_content = ""
    if paper_id is None:
        return recommended_content

    recommended_papers = get_recommendations_from_semantic_scholar(paper_id)
    filtered_recommendations = filter_recommendations(recommended_papers)

    recommended_content = format_recommendation_into_markdown(filtered_recommendations)
    return recommended_content


def fetch_papers(top_n=5):
    try:
        response = requests.get(f"{HF_API_URL}?limit=100")
        response.raise_for_status()
        items = response.json()

        # Calculate the date 3 days ago from now
        three_days_ago = datetime.now() - timedelta(days=3)

        # Filter items from the last 3 days
        recent_items = [
            item
            for item in items
            if parse_date(item.get("publishedAt")) >= three_days_ago
        ]

        recent_items.sort(
            key=lambda x: x.get("paper", {}).get("upvotes", 0), reverse=True
        )
        output_items = [
            {
                "title": item.get("paper", {}).get("title"),
                "url": ARXIV_URL.format(paper_id=item.get("paper", {}).get("id")),
                "upvotes": item.get("paper", {}).get("upvotes"),
            }
            for item in recent_items[:top_n]
        ]
    except Exception as e:
        print(e)
        return []

    return output_items


================================================
FILE: libs/ktem/ktem/utils/lang.py
================================================
SUPPORTED_LANGUAGE_MAP = {
    "en": "English",
    "ja": "Japanese",
    "vi": "Vietnamese",
    "es": "Spanish",
    "fr": "French",
    "de": "German",
    "zh": "Chinese",
    "ru": "Russian",
    "ar": "Arabic",
    "pt": "Portuguese",
    "hi": "Hindi",
    "bn": "Bengali",
    "pa": "Punjabi",
    "ko": "Korean",
    "it": "Italian",
    "nl": "Dutch",
    "tr": "Turkish",
    "pl": "Polish",
    "uk": "Ukrainian",
    "ro": "Romanian",
    "el": "Greek",
    "hu": "Hungarian",
    "sv": "Swedish",
    "cs": "Czech",
    "fi": "Finnish",
    "da": "Danish",
    "no": "Norwegian",
    "he": "Hebrew",
    "th": "Thai",
    "id": "Indonesian",
    "ms": "Malay",
}


================================================
FILE: libs/ktem/ktem/utils/plantuml.py
================================================
#!/usr/bin/env python

from __future__ import print_function

import base64
import string
from zlib import compress

import httplib2
import six  # type: ignore

if six.PY2:
    from string import maketrans
else:
    maketrans = bytes.maketrans


plantuml_alphabet = (
    string.digits + string.ascii_uppercase + string.ascii_lowercase + "-_"
)
base64_alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/"
b64_to_plantuml = maketrans(
    base64_alphabet.encode("utf-8"), plantuml_alphabet.encode("utf-8")
)


class PlantUMLError(Exception):
    """
    Error in processing.
    """


class PlantUMLConnectionError(PlantUMLError):
    """
    Error connecting or talking to PlantUML Server.
    """


class PlantUMLHTTPError(PlantUMLConnectionError):
    """
    Request to PlantUML server returned HTTP Error.
    """

    def __init__(self, response, content, *args, **kwdargs):
        self.response = response
        self.content = content
        message = "%d: %s" % (self.response.status, self.response.reason)
        if not getattr(self, "message", None):
            self.message = message
        super(PlantUMLHTTPError, self).__init__(message, *args, **kwdargs)


def deflate_and_encode(plantuml_text):
    """zlib compress the plantuml text and encode it for the plantuml server."""
    zlibbed_str = compress(plantuml_text.encode("utf-8"))
    compressed_string = zlibbed_str[2:-4]
    return (
        base64.b64encode(compressed_string).translate(b64_to_plantuml).decode("utf-8")
    )


class PlantUML(object):
    """Connection to a PlantUML server with optional authentication.

    All parameters are optional.

    :param str url: URL to the PlantUML server image CGI. defaults to
                    http://www.plantuml.com/plantuml/svg/
    :param dict request_opts: Extra options to be passed off to the
                    httplib2.Http().request() call.
    """

    def __init__(self, url="http://www.plantuml.com/plantuml/svg/", request_opts={}):
        self.HttpLib2Error = httplib2.HttpLib2Error
        self.http = httplib2.Http()

        self.url = url
        self.request_opts = request_opts

    def get_url(self, plantuml_text):
        """Return the server URL for the image.
        You can use this URL in an IMG HTML tag.

        :param str plantuml_text: The plantuml markup to render
        :returns: the plantuml server image URL
        """
        return self.url + deflate_and_encode(plantuml_text)

    def process(self, plantuml_text):
        """Processes the plantuml text into the raw PNG image data.

        :param str plantuml_text: The plantuml markup to render
        :returns: the raw image data
        """
        url = self.get_url(plantuml_text)
        try:
            response, content = self.http.request(url, **self.request_opts)
        except self.HttpLib2Error as e:
            raise PlantUMLConnectionError(e)
        if response.status != 200:
            raise PlantUMLHTTPError(response, content)

        svg_content = content.decode("utf-8")
        svg_content = svg_content.replace("<svg ", "<svg id='mindmap' ")

        # wrap in fixed height div
        svg_content = (
            "<div id='mindmap-wrapper' "
            "style='height: 400px; overflow: hidden;'>"
            f"{svg_content}</div>"
        )

        return svg_content


================================================
FILE: libs/ktem/ktem/utils/rate_limit.py
================================================
from collections import defaultdict
from datetime import datetime, timedelta

import gradio as gr
from decouple import config

# In-memory store for rate limiting (for demonstration purposes)
rate_limit_store: dict[str, dict] = defaultdict(dict)

# Rate limit configuration
RATE_LIMIT = config("RATE_LIMIT", default=20, cast=int)
RATE_LIMIT_PERIOD = timedelta(hours=24)


def check_rate_limit(limit_type: str, request: gr.Request):
    if request is None:
        raise ValueError("This feature is not available")

    user_id = None
    try:
        import gradiologin as grlogin

        user = grlogin.get_user(request)
        if user:
            user_id = user.get("email")
    except (ImportError, AssertionError):
        pass

    if not user_id:
        raise ValueError("Please sign-in to use this feature")

    now = datetime.now()
    user_data = rate_limit_store[limit_type].get(
        user_id, {"count": 0, "reset_time": now + RATE_LIMIT_PERIOD}
    )

    if now >= user_data["reset_time"]:
        # Reset the rate limit for the user
        user_data = {"count": 0, "reset_time": now + RATE_LIMIT_PERIOD}

    if user_data["count"] >= RATE_LIMIT:
        raise ValueError("Rate limit exceeded. Please try again later.")

    # Increment the request count
    user_data["count"] += 1
    rate_limit_store[limit_type][user_id] = user_data

    return user_id


================================================
FILE: libs/ktem/ktem/utils/render.py
================================================
import os

import markdown
from fast_langdetect import detect

from kotaemon.base import RetrievedDocument

BASE_PATH = os.environ.get("GR_FILE_ROOT_PATH", "")


def is_close(val1, val2, tolerance=1e-9):
    return abs(val1 - val2) <= tolerance


def replace_mardown_header(text: str) -> str:
    textlines = text.splitlines()
    newlines = []
    for line in textlines:
        if line.startswith("#"):
            line = "<strong>" + line.replace("#", "") + "</strong>"
        if line.startswith("=="):
            line = ""
        newlines.append(line)

    return "\n".join(newlines)


def get_header(doc: RetrievedDocument) -> str:
    """Get the header for the document"""
    header = ""
    if "page_label" in doc.metadata:
        header += f" [Page {doc.metadata['page_label']}]"

    header += f" {doc.metadata.get('file_name', '<evidence>')}"
    return header.strip()


class Render:
    """Default text rendering into HTML for the UI"""

    @staticmethod
    def collapsible(header, content, open: bool = False) -> str:
        """Render an HTML friendly collapsible section"""
        o = " open" if open else ""
        return (
            f"<details class='evidence' {o}><summary>"
            f"{header}</summary>{content}"
            "</details><br>"
        )

    @staticmethod
    def table(text: str) -> str:
        """Render table from markdown format into HTML"""
        text = replace_mardown_header(text)
        return markdown.markdown(
            text,
            extensions=[
                "markdown.extensions.tables",
                "markdown.extensions.fenced_code",
            ],
        )

    @staticmethod
    def table_preserve_linebreaks(text: str) -> str:
        """Render table from markdown format into HTML"""
        return markdown.markdown(
            text,
            extensions=[
                "markdown.extensions.tables",
                "markdown.extensions.fenced_code",
            ],
        ).replace("\n", "<br>")

    @staticmethod
    def preview(
        html_content: str,
        doc: RetrievedDocument,
        highlight_text: str | None = None,
    ) -> str:
        text = doc.content
        pdf_path = doc.metadata.get("file_path", "")

        if not os.path.isfile(pdf_path):
            print(f"pdf-path: {pdf_path} does not exist")
            return html_content

        is_pdf = doc.metadata.get("file_type", "") == "application/pdf"
        page_idx = int(doc.metadata.get("page_label", 1))

        if not is_pdf:
            print("Document is not pdf")
            return html_content

        if page_idx < 0:
            print("Fail to extract page number")
            return html_content

        if not highlight_text:
            phrase = "false"
            try:
                lang = detect(text.replace("\n", " "))["lang"]
                if lang not in ["ja", "cn"]:
                    highlight_words = [
                        t[:-1] if t.endswith("-") else t for t in text.split("\n")
                    ]
                    highlight_text = highlight_words[0]
                    phrase = "true"

                highlight_text = (
                    text.replace("\n", "").replace('"', "").replace("'", "")
                )
            except Exception as e:
                print(e)
                highlight_text = text
        else:
            phrase = "true"

        return f"""
        {html_content}
        <a href="#" class="pdf-link" data-src="{BASE_PATH}/file={pdf_path}" data-page="{page_idx}" data-search="{highlight_text}" data-phrase="{phrase}">
            [Preview]
        </a>
        """  # noqa

    @staticmethod
    def highlight(text: str, elem_id: str | None = None) -> str:
        """Highlight text"""
        id_text = f" id='mark-{elem_id}'" if elem_id else ""
        return f"<mark{id_text}>{text}</mark>"

    @staticmethod
    def image(url: str, text: str = "") -> str:
        """Render an image"""
        img = f'<img src="{url}"><br>'
        if text:
            caption = f"<p>{text}</p>"
            return f"<figure>{img}{caption}</figure><br>"
        return img

    @staticmethod
    def collapsible_with_header(
        doc: RetrievedDocument,
        open_collapsible: bool = False,
    ) -> str:
        header = f"<i>{get_header(doc)}</i>"
        if doc.metadata.get("type", "") == "image":
            doc_content = Render.image(url=doc.metadata["image_origin"], text=doc.text)
        elif doc.metadata.get("type", "") == "table_raw":
            doc_content = Render.table_preserve_linebreaks(doc.text)
        else:
            doc_content = Render.table(doc.text)

        return Render.collapsible(
            header=Render.preview(header, doc),
            content=doc_content,
            open=open_collapsible,
        )

    @staticmethod
    def collapsible_with_header_score(
        doc: RetrievedDocument,
        override_text: str | None = None,
        highlight_text: str | None = None,
        open_collapsible: bool = False,
    ) -> str:
        """Format the retrieval score and the document"""
        # score from doc_store (Elasticsearch)
        if is_close(doc.score, -1.0):
            vectorstore_score = ""
            text_search_str = " (full-text search)<br>"
        else:
            vectorstore_score = str(round(doc.score, 2))
            text_search_str = "<br>"

        llm_reranking_score = (
            round(doc.metadata["llm_trulens_score"], 2)
            if doc.metadata.get("llm_trulens_score") is not None
            else 0.0
        )
        reranking_score = (
            round(doc.metadata["reranking_score"], 2)
            if doc.metadata.get("reranking_score") is not None
            else 0.0
        )
        item_type_prefix = doc.metadata.get("type", "")
        item_type_prefix = item_type_prefix.capitalize()
        if item_type_prefix:
            item_type_prefix += " from "

        if "raw" in item_type_prefix:
            item_type_prefix = ""

        if llm_reranking_score > 0:
            relevant_score = llm_reranking_score
        elif reranking_score > 0:
            relevant_score = reranking_score
        else:
            relevant_score = 0.0

        rendered_score = Render.collapsible(
            header=f"<b>&emsp;Relevance score</b>: {relevant_score:.1f}",
            content="<b>&emsp;&emsp;Vectorstore score:</b>"
            f" {vectorstore_score}"
            f"{text_search_str}"
            "<b>&emsp;&emsp;LLM relevant score:</b>"
            f" {llm_reranking_score}<br>"
            "<b>&emsp;&emsp;Reranking score:</b>"
            f" {reranking_score}<br>",
        )

        text = doc.text if not override_text else override_text
        if doc.metadata.get("type", "") == "image":
            rendered_doc_content = Render.image(
                url=doc.metadata["image_origin"],
                text=text,
            )
        elif doc.metadata.get("type", "") == "table_raw":
            rendered_doc_content = Render.table_preserve_linebreaks(doc.text)
        else:
            rendered_doc_content = Render.table(text)

        rendered_header = Render.preview(
            f"<i>{item_type_prefix}{get_header(doc)}</i>"
            f" [score: {llm_reranking_score}]",
            doc,
            highlight_text=highlight_text,
        )
        rendered_doc_content = (
            f"<div class='evidence-content'>{rendered_doc_content}</div>"
        )

        return Render.collapsible(
            header=rendered_header,
            content=rendered_score + rendered_doc_content,
            open=open_collapsible,
        )


================================================
FILE: libs/ktem/ktem/utils/visualize_cited.py
================================================
"""
This module aims to project high-dimensional embeddings
into a lower-dimensional space for visualization.

Refs:
1. [RAGxplorer](https://github.com/gabrielchua/RAGxplorer)
2. [RAGVizExpander](https://github.com/KKenny0/RAGVizExpander)
"""
from typing import List, Tuple

import numpy as np
import pandas as pd
import plotly.graph_objs as go
import umap

from kotaemon.base import BaseComponent
from kotaemon.embeddings import BaseEmbeddings

VISUALIZATION_SETTINGS = {
    "Original Query": {"color": "red", "opacity": 1, "symbol": "cross", "size": 15},
    "Retrieved": {"color": "green", "opacity": 1, "symbol": "circle", "size": 10},
    "Chunks": {"color": "blue", "opacity": 0.4, "symbol": "circle", "size": 10},
    "Sub-Questions": {"color": "purple", "opacity": 1, "symbol": "star", "size": 15},
}


class CreateCitationVizPipeline(BaseComponent):
    """Creating PlotData for visualizing query results"""

    embedding: BaseEmbeddings
    projector: umap.UMAP = None

    def _set_up_umap(self, embeddings: np.ndarray):
        umap_transform = umap.UMAP().fit(embeddings)
        return umap_transform

    def _project_embeddings(self, embeddings, umap_transform) -> np.ndarray:
        umap_embeddings = np.empty((len(embeddings), 2))
        for i, embedding in enumerate(embeddings):
            umap_embeddings[i] = umap_transform.transform([embedding])
        return umap_embeddings

    def _get_projections(self, embeddings, umap_transform):
        projections = self._project_embeddings(embeddings, umap_transform)
        x = projections[:, 0]
        y = projections[:, 1]
        return x, y

    def _prepare_projection_df(
        self,
        document_projections: Tuple[np.ndarray, np.ndarray],
        document_text: List[str],
        plot_size: int = 3,
    ) -> pd.DataFrame:
        """Prepares a DataFrame for visualization from projections and texts.

        Args:
            document_projections (Tuple[np.ndarray, np.ndarray]):
                Tuple of X and Y coordinates of document projections.
            document_text (List[str]): List of document texts.
        """
        df = pd.DataFrame({"x": document_projections[0], "y": document_projections[1]})
        df["document"] = document_text
        df["document_cleaned"] = df.document.str.wrap(50).apply(
            lambda x: x.replace("\n", "<br>")[:512] + "..."
        )
        df["size"] = plot_size
        df["category"] = "Retrieved"
        return df

    def _plot_embeddings(self, df: pd.DataFrame) -> go.Figure:
        """
        Creates a Plotly figure to visualize the embeddings.

        Args:
            df (pd.DataFrame): DataFrame containing the data to visualize.

        Returns:
            go.Figure: A Plotly figure object for visualization.
        """
        fig = go.Figure()

        for category in df["category"].unique():
            category_df = df[df["category"] == category]
            settings = VISUALIZATION_SETTINGS.get(
                category,
                {"color": "grey", "opacity": 1, "symbol": "circle", "size": 10},
            )
            fig.add_trace(
                go.Scatter(
                    x=category_df["x"],
                    y=category_df["y"],
                    mode="markers",
                    name=category,
                    marker=dict(
                        color=settings["color"],
                        opacity=settings["opacity"],
                        symbol=settings["symbol"],
                        size=settings["size"],
                        line_width=0,
                    ),
                    hoverinfo="text",
                    text=category_df["document_cleaned"],
                )
            )

        fig.update_layout(
            height=500,
            legend=dict(y=100, x=0.5, xanchor="center", yanchor="top", orientation="h"),
        )
        return fig

    def run(self, context: List[str], question: str):
        embed_contexts = self.embedding(context)
        context_embeddings = np.array([d.embedding for d in embed_contexts])

        self.projector = self._set_up_umap(embeddings=context_embeddings)

        embed_query = self.embedding(question)
        query_projection = self._get_projections(
            embeddings=[embed_query[0].embedding], umap_transform=self.projector
        )
        viz_query_df = pd.DataFrame(
            {
                "x": [query_projection[0][0]],
                "y": [query_projection[1][0]],
                "document_cleaned": question,
                "category": "Original Query",
                "size": 5,
            }
        )

        context_projections = self._get_projections(
            embeddings=context_embeddings, umap_transform=self.projector
        )
        viz_base_df = self._prepare_projection_df(
            document_projections=context_projections, document_text=context
        )

        visualization_df = pd.concat([viz_base_df, viz_query_df], axis=0)
        fig = self._plot_embeddings(visualization_df)
        return fig


================================================
FILE: libs/ktem/ktem_tests/__init__.py
================================================


================================================
FILE: libs/ktem/ktem_tests/resources/embedding_openai.json
================================================
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "index": 0,
      "embedding": [
        0.006555966101586819,
        0.003670461941510439,
        -0.011642491444945335,
        -0.026776473969221115,
        -0.012383491732180119,
        -0.0014341175556182861,
        -0.013375678099691868,
        0.009356695227324963,
        -0.006364436354488134,
        -0.0294390507042408,
        0.023950627073645592,
        0.0029859787318855524,
        -0.023234745487570763,
        -0.009205983020365238,
        0.006744355894625187,
        0.0011790062999352813,
        0.02607315219938755,
        -0.018437083810567856,
        0.008904559537768364,
        0.009620440192520618,
        -0.01306169480085373,
        -0.0011358336778357625,
        0.007253008428961039,
        0.00875384733080864,
        -0.012710033915936947,
        0.0037206991109997034,
        0.005419347435235977,
        -0.017243949696421623,
        0.036246202886104584,
        -0.0266759991645813,
        0.012647237628698349,
        -0.008552898652851582,
        -0.00762350857257843,
        -0.012546762824058533,
        0.007083457428961992,
        -0.014078999869525433,
        0.0048761568032205105,
        -0.013689660467207432,
        0.018211016431450844,
        -0.014367864467203617,
        0.008307991549372673,
        0.006022194866091013,
        0.005457025486975908,
        -0.006320478860288858,
        -0.03609549254179001,
        0.012986338697373867,
        0.00017592862423043698,
        -0.016427593305706978,
        -0.004734864458441734,
        0.0257717277854681,
        0.01944182999432087,
        -0.00211938563734293,
        -0.0147195253521204,
        0.01332544069737196,
        -0.017670966684818268,
        0.0013956546317785978,
        -0.04024006798863411,
        0.024754423648118973,
        0.032051388174295425,
        -0.018713390454649925,
        0.011265711858868599,
        0.012370931915938854,
        -0.013576626777648926,
        0.0040974789299070835,
        -0.002153923735022545,
        0.009915584698319435,
        0.006637601647526026,
        0.011353626847267151,
        -0.019919084385037422,
        0.030142372474074364,
        0.01966789737343788,
        -0.008213796652853489,
        -0.004213652573525906,
        -0.009162025526165962,
        0.012446288019418716,
        0.008979915641248226,
        -0.02098662778735161,
        0.003113141981884837,
        0.0018838982796296477,
        -0.0008289152756333351,
        0.02722861059010029,
        -0.03355850651860237,
        -0.004408321809023619,
        0.017859356477856636,
        0.019893966615200043,
        -0.0014137086691334844,
        -0.0021602034103125334,
        0.017959831282496452,
        -0.0114603815600276,
        -0.020069796591997147,
        0.007604669313877821,
        0.001281835837289691,
        0.01132222916930914,
        0.006668999791145325,
        -0.01743233948945999,
        0.0048761568032205105,
        0.0016923686489462852,
        0.02356128767132759,
        -0.005796127021312714,
        -0.041470881551504135,
        -0.01292354241013527,
        -0.005824385676532984,
        -0.010700542479753494,
        -0.017005322501063347,
        -0.007950050756335258,
        0.011560855433344841,
        -0.005485283676534891,
        -0.0002590360236354172,
        0.028434304520487785,
        0.007447678130120039,
        -0.026198744773864746,
        0.009306457825005054,
        -0.00500488979741931,
        -0.03107176162302494,
        -0.0007555217016488314,
        -0.008056805469095707,
        0.007479076273739338,
        0.003978165332227945,
        -0.005984516814351082,
        -0.02778122015297413,
        0.015272135846316814,
        0.029338575899600983,
        0.010964288376271725,
        -0.023548727855086327,
        0.007014381233602762,
        0.0014137086691334844,
        -0.03380969539284706,
        -0.016741575673222542,
        -0.005230957642197609,
        -0.004794521257281303,
        0.0315992534160614,
        0.0041100382804870605,
        0.010644025169312954,
        -0.0014294078573584557,
        -0.035367049276828766,
        0.0246916264295578,
        -0.007096016779541969,
        0.02096150815486908,
        -0.017030440270900726,
        -0.011906237341463566,
        0.006273381412029266,
        0.021237812936306,
        0.01757049188017845,
        -0.013224965892732143,
        -0.005092805251479149,
        0.01288586389273405,
        0.004486817866563797,
        -0.016339678317308426,
        0.008917118422687054,
        -0.015083746053278446,
        0.008377067744731903,
        0.0185626782476902,
        0.012961219996213913,
        -0.002535413019359112,
        0.0050143091939389706,
        0.016025694087147713,
        0.004618690814822912,
        0.0205470509827137,
        0.008000288158655167,
        -0.013074253685772419,
        -0.0007264782907441258,
        -0.0004474258457776159,
        0.0033125211484730244,
        -0.03084569424390793,
        0.009400652721524239,
        0.015787066891789436,
        0.02607315219938755,
        0.003689300734549761,
        -0.013940847478806973,
        -0.02604803442955017,
        -0.011454101651906967,
        0.007340923883020878,
        -0.043178949505090714,
        0.02660064399242401,
        -0.01593777909874916,
        0.00514304218813777,
        -0.001305384561419487,
        0.022820288315415382,
        -0.012697474099695683,
        -0.017269067466259003,
        -0.03024284727871418,
        0.011617372743785381,
        0.008992474526166916,
        0.025897322222590446,
        0.00629850011318922,
        -0.00014511904737446457,
        0.009890465997159481,
        -0.006364436354488134,
        0.00384629238396883,
        -0.017030440270900726,
        0.016666220501065254,
        0.03333244100213051,
        0.021011745557188988,
        0.01844964362680912,
        -0.6892555952072144,
        -0.007196491584181786,
        0.025319593027234077,
        0.02024562656879425,
        0.006245122756808996,
        0.009909304790198803,
        0.012176262214779854,
        0.027806337922811508,
        0.006518288049846888,
        0.0274546779692173,
        -0.02107454277575016,
        0.013011457398533821,
        -0.015083746053278446,
        -0.010719381272792816,
        -0.0002598209830466658,
        -0.00823263544589281,
        0.0055763390846550465,
        -0.012339534237980843,
        -0.011881118640303612,
        0.015196779742836952,
        -0.010141652077436447,
        0.03129782900214195,
        -0.022104406729340553,
        -0.014568813145160675,
        0.009928143583238125,
        0.011730406433343887,
        0.0025102945510298014,
        -0.009739753790199757,
        -0.006964143831282854,
        0.030192609876394272,
        -0.0021272350568324327,
        0.0114603815600276,
        9.394961671205238e-05,
        0.014154355973005295,
        0.06611227244138718,
        0.007610949221998453,
        -0.005463304929435253,
        0.008088203147053719,
        0.009827669709920883,
        0.028986915946006775,
        -0.019077610224485397,
        -0.022267676889896393,
        0.012559321708977222,
        -0.01662854291498661,
        -0.002763050841167569,
        0.009205983020365238,
        0.012659796513617039,
        -0.0024051100481301546,
        0.0022543983068317175,
        0.0017849936848506331,
        0.014568813145160675,
        -0.004851038102060556,
        0.010857533663511276,
        0.019504627212882042,
        -0.005343991331756115,
        0.0071588135324418545,
        0.02151411771774292,
        0.011962753720581532,
        -0.002935741562396288,
        0.004521355964243412,
        0.0005718416068702936,
        0.017168592661619186,
        -0.0016280021518468857,
        0.0038494321051985025,
        -0.022443508729338646,
        0.02381247468292713,
        -0.021313169971108437,
        0.01400364376604557,
        -0.008929678238928318,
        -0.012050669640302658,
        -0.0018368008313700557,
        -0.007994008250534534,
        -0.018047746270895004,
        -0.014744644053280354,
        0.013965966179966927,
        0.028107762336730957,
        0.01681693270802498,
        -0.0033219405449926853,
        -0.006160347256809473,
        0.01966789737343788,
        0.010926609858870506,
        0.013237525708973408,
        -0.014066440984606743,
        -0.01764584705233574,
        0.02349849045276642,
        -0.019780931994318962,
        -0.030067017301917076,
        -0.016276881098747253,
        0.010430516675114632,
        0.011479220353066921,
        0.030192609876394272,
        0.0033250804990530014,
        -0.012370931915938854,
        -0.01980605162680149,
        0.02883620373904705,
        0.0005384809337556362,
        -0.011548296548426151,
        0.003821173682808876,
        0.027303965762257576,
        0.0004238771216478199,
        0.014933033846318722,
        -0.0028038686141371727,
        0.0003752097545657307,
        0.00652456795796752,
        0.010279805399477482,
        0.006656440440565348,
        -0.00809448305517435,
        0.033156611025333405,
        0.02444044128060341,
        -0.030142372474074364,
        0.006317338906228542,
        0.004094338975846767,
        -0.04071732237935066,
        0.02121269516646862,
        0.008471262641251087,
        -0.032001152634620667,
        0.0013022447237744927,
        0.016013136133551598,
        0.026374576613307,
        -0.014694406650960445,
        0.023159390315413475,
        0.0020424597896635532,
        0.013664542697370052,
        0.0057458896189928055,
        0.0041539957746863365,
        0.005303173791617155,
        0.0003883577883243561,
        0.004342385567724705,
        -0.01690484769642353,
        0.001676669460721314,
        0.020697763189673424,
        0.005133622791618109,
        0.00046273251064121723,
        -0.005651694722473621,
        0.009808829985558987,
        0.00202676048502326,
        0.009896745905280113,
        -0.017884474247694016,
        0.00862825382500887,
        0.020308423787355423,
        -0.007994008250534534,
        0.005127343349158764,
        -0.0029388812836259604,
        -0.0006303209811449051,
        0.011818322353065014,
        -0.018399406224489212,
        -0.012025550939142704,
        0.008678491227328777,
        0.00016974708705674857,
        -0.013677101582288742,
        0.01983116939663887,
        -0.007918652147054672,
        -0.03245328739285469,
        0.021162457764148712,
        -0.008044245652854443,
        -0.0002005566784646362,
        0.0036076651886105537,
        -0.029187863692641258,
        -0.03569359332323074,
        -0.028961796313524246,
        0.0027096737176179886,
        0.016364796087145805,
        -0.0071022966876626015,
        -0.005174440797418356,
        -0.01400364376604557,
        -0.023787355050444603,
        -0.024566033855080605,
        0.011648771353065968,
        0.005579478573054075,
        -0.029012033715844154,
        0.010857533663511276,
        -0.0049420930445194244,
        -0.025093525648117065,
        -0.0024961652234196663,
        -0.0030016780365258455,
        0.010694262571632862,
        -0.010681703686714172,
        -0.015724271535873413,
        -0.003249724628403783,
        -0.01731930486857891,
        0.006876228842884302,
        -0.009601601399481297,
        -0.009821389801800251,
        0.012176262214779854,
        0.02582196518778801,
        -0.010185610502958298,
        -0.0005722341011278331,
        0.02747979573905468,
        -0.019793491810560226,
        -0.0016405613860115409,
        0.017005322501063347,
        0.0072027710266411304,
        -0.02220488153398037,
        0.0006103045307099819,
        -0.007912373170256615,
        -0.004559034015983343,
        -0.0010808866936713457,
        0.0038023346569389105,
        0.01679181307554245,
        0.01718115247786045,
        0.018713390454649925,
        0.004100618418306112,
        0.017859356477856636,
        -0.026022914797067642,
        -0.016013136133551598,
        -0.028811084106564522,
        -0.0023705719504505396,
        -0.030343322083353996,
        0.003858851734548807,
        0.013890610076487064,
        0.011969033628702164,
        -0.002329754177480936,
        -0.0014223431935533881,
        0.01764584705233574,
        0.016276881098747253,
        0.02692718617618084,
        0.0021821821574121714,
        -0.020923830568790436,
        -0.02855989895761013,
        -0.007510474417358637,
        -0.002681415295228362,
        0.008326830342411995,
        -0.01458137296140194,
        -0.0007727907504886389,
        0.01720627024769783,
        0.032604001462459564,
        0.004417741671204567,
        0.03554287925362587,
        0.003953046631067991,
        -0.0322021022439003,
        -0.02505584806203842,
        0.009231101721525192,
        0.012659796513617039,
        0.009563923813402653,
        -0.016842050477862358,
        -0.005513542331755161,
        0.0041414364241063595,
        -0.014405542053282261,
        0.02916274592280388,
        0.012163703329861164,
        -0.008502661250531673,
        0.010185610502958298,
        0.028710609301924706,
        -0.0177839994430542,
        0.00950112659484148,
        0.028384067118167877,
        0.014744644053280354,
        -0.009444610215723515,
        0.005055127199739218,
        0.02803240716457367,
        -0.0009089809027500451,
        0.0004933458403684199,
        -0.014405542053282261,
        -0.008433585055172443,
        0.0012111896649003029,
        -0.031498778611421585,
        0.0004556679050438106,
        -0.021388525143265724,
        0.019605102017521858,
        0.03569359332323074,
        0.013689660467207432,
        0.01161109283566475,
        -0.0064680506475269794,
        -0.005783567670732737,
        0.009205983020365238,
        -0.013413355685770512,
        -0.001551076304167509,
        -0.013488711789250374,
        0.003160239430144429,
        -0.005114783998578787,
        -0.012408610433340073,
        -0.003158669453114271,
        0.0335836261510849,
        -0.026274101808667183,
        0.009319016709923744,
        0.0005424057017080486,
        -0.0011326938401907682,
        -0.008841762319207191,
        -0.005205838941037655,
        0.0030566249042749405,
        -0.013036576099693775,
        -0.022443508729338646,
        -0.0017865635454654694,
        0.011830881237983704,
        0.007096016779541969,
        -0.01605081371963024,
        -0.024038542062044144,
        -0.005039427895098925,
        -0.0011122849537059665,
        -0.0032779830507934093,
        -0.008571737445890903,
        0.0041194576770067215,
        0.014292508363723755,
        -0.02342313528060913,
        0.015485644340515137,
        -0.002646877197548747,
        0.020722880959510803,
        0.00680087273940444,
        -0.009865347295999527,
        -0.00600649556145072,
        0.021036865189671516,
        0.020371221005916595,
        -0.0016107329865917563,
        -0.019291117787361145,
        0.013287762179970741,
        -0.004006423521786928,
        -0.00762350857257843,
        -0.015334932133555412,
        0.0008681631297804415,
        -0.008295431733131409,
        -0.0007543442770838737,
        0.011523177847266197,
        -0.0027818898670375347,
        0.0037332584615796804,
        0.01304913591593504,
        -0.005237237084656954,
        -0.00340043636970222,
        0.016691338270902634,
        0.02612338960170746,
        -0.002295216079801321,
        -0.008333110250532627,
        -0.009683237411081791,
        -0.022305356338620186,
        -0.00935041531920433,
        0.06972935795783997,
        -0.001001605880446732,
        -0.0031319810077548027,
        0.019617659971117973,
        -0.019014813005924225,
        -0.012848186306655407,
        -0.04051637277007103,
        -0.018223576247692108,
        -0.01665366068482399,
        -0.0031979172490537167,
        -0.004436580464243889,
        0.004458559211343527,
        -0.013802695088088512,
        0.01994420401751995,
        0.013551508076488972,
        0.009080389514565468,
        0.00340043636970222,
        -0.027052778750658035,
        -0.011341067962348461,
        0.0048667374067008495,
        -0.0151088647544384,
        0.010110254399478436,
        0.00046077012666501105,
        0.009990940801799297,
        0.0205470509827137,
        0.008307991549372673,
        0.00277247023768723,
        0.01720627024769783,
        -0.007460237015038729,
        -0.0038965295534580946,
        0.008483821526169777,
        -0.00610383041203022,
        0.01745745725929737,
        0.008778966031968594,
        0.013312880881130695,
        0.014078999869525433,
        0.0029043431859463453,
        0.03780356049537659,
        0.008464982733130455,
        -0.005384809337556362,
        0.011868558824062347,
        0.005287474486976862,
        -0.006179186515510082,
        -0.006009635515511036,
        0.0142673896625638,
        0.0023344638757407665,
        -0.0057710083201527596,
        0.028660371899604797,
        -0.0014239131705835462,
        -0.028685491532087326,
        0.023121710866689682,
        0.007742822170257568,
        -0.009218541905283928,
        -0.023071475327014923,
        -0.01187483873218298,
        0.018072864040732384,
        0.00027355772908777,
        -0.013928287662565708,
        -0.010844974778592587,
        -0.017796559259295464,
        -0.014744644053280354,
        -0.01513398252427578,
        -0.014694406650960445,
        -0.001071467180736363,
        -0.031172236427664757,
        -0.019190644845366478,
        -0.02369944006204605,
        0.002376851625740528,
        -0.007466516923159361,
        0.001561280689202249,
        -0.0048981355503201485,
        -0.03863247483968735,
        -0.0125404829159379,
        -0.0036516229156404734,
        0.01909017004072666,
        0.0004517431079875678,
        -0.001898027490824461,
        -0.006486889906227589,
        0.004044101573526859,
        0.013802695088088512,
        -0.006047313567250967,
        -0.029012033715844154,
        0.011271991766989231,
        -0.006292220205068588,
        0.015611236914992332,
        0.0031727987807244062,
        0.00666272034868598,
        0.009111788123846054,
        -0.01690484769642353,
        0.029589762911200523,
        0.008521500043570995,
        0.015071186237037182,
        0.026299219578504562,
        0.0003149642434436828,
        0.018650593236088753,
        0.004537055268883705,
        0.0050582666881382465,
        0.001783423707820475,
        -0.019906524568796158,
        -1.6802998288767412e-05,
        -0.008307991549372673,
        -0.01179320365190506,
        -0.0020785678643733263,
        0.004810220096260309,
        -0.0034695127978920937,
        0.01676669530570507,
        0.01690484769642353,
        0.01568659394979477,
        -0.015372609719634056,
        -0.0016327118501067162,
        0.034211594611406326,
        -0.024088779464364052,
        0.016251763328909874,
        -0.0064806099981069565,
        0.0001965337578440085,
        0.013438474386930466,
        0.021564355120062828,
        0.03504050895571709,
        -0.004694046452641487,
        -0.03443766012787819,
        -0.005557499825954437,
        -0.025281915441155434,
        0.008490101434290409,
        0.0157996267080307,
        0.009130626916885376,
        0.0012418029364198446,
        -0.0018619195325300097,
        -0.022192321717739105,
        -0.006907626986503601,
        -0.0032748430967330933,
        -0.005548080429434776,
        0.019542304798960686,
        -0.006304779555648565,
        -0.0023250444792211056,
        -0.015309813432395458,
        -0.006499449256807566,
        -0.018324051052331924,
        0.005968817975372076,
        -0.006851110141724348,
        -0.017859356477856636,
        -0.02425205148756504,
        -0.0063236188143491745,
        -0.006568525452166796,
        -0.013890610076487064,
        -0.04551498219370842,
        -0.044786542654037476,
        -0.016163846477866173,
        0.00394362723454833,
        -0.017444897443056107,
        0.016364796087145805,
        -0.003949906677007675,
        -0.000322813808452338,
        -0.019981881603598595,
        -0.010625186376273632,
        -0.006342457607388496,
        -0.01692996546626091,
        -0.0006311059114523232,
        -0.006116389762610197,
        0.027002541348338127,
        0.027931932359933853,
        0.025369830429553986,
        0.013978525064885616,
        0.0114603815600276,
        0.006474330555647612,
        -0.008559177629649639,
        -0.01662854291498661,
        0.004559034015983343,
        -0.012094627134501934,
        -0.014204593375325203,
        0.012710033915936947,
        0.020999185740947723,
        0.028459424152970314,
        -0.007026940584182739,
        0.009595322422683239,
        0.0048447586596012115,
        -0.008188677951693535,
        -0.020873593166470528,
        0.0010173050686717033,
        -0.013928287662565708,
        -0.014405542053282261,
        -0.020974067971110344,
        -0.0010471334680914879,
        0.002153923735022545,
        0.010958008468151093,
        0.011234313249588013,
        -0.015548440627753735,
        0.011812042444944382,
        0.008596856147050858,
        0.02079823799431324,
        0.003050345228984952,
        0.026299219578504562,
        -0.01969301700592041,
        0.02742955833673477,
        0.007265567779541016,
        0.020597288385033607,
        -0.013162169605493546,
        0.007711423560976982,
        -0.006794593296945095,
        0.016917407512664795,
        0.009262500330805779,
        0.021451322361826897,
        0.02770586498081684,
        -0.012270457111299038,
        -0.010593787766993046,
        0.005884042475372553,
        0.026173627004027367,
        0.0031162817031145096,
        0.007510474417358637,
        0.004194813314825296,
        -0.007862135767936707,
        0.0027096737176179886,
        -0.020195389166474342,
        -0.004998610354959965,
        -0.013501270674169064,
        0.004712885711342096,
        -0.012201380915939808,
        -0.01872594840824604,
        0.018625473603606224,
        -0.011881118640303612,
        -0.029087388888001442,
        -0.015787066891789436,
        -0.0003167304093949497,
        0.048001728951931,
        0.005959398113191128,
        0.012100907042622566,
        0.01939159259200096,
        0.0028556757606565952,
        -0.005940559320151806,
        0.02068520337343216,
        -0.018675711005926132,
        -0.005522961728274822,
        0.039813049137592316,
        0.0015455815009772778,
        -0.0063236188143491745,
        -0.02416413463652134,
        0.004684627056121826,
        0.004078639671206474,
        -0.015071186237037182,
        -0.024051101878285408,
        0.008050525560975075,
        0.028760846704244614,
        -0.002221429953351617,
        -0.014619050547480583,
        0.006113249808549881,
        -0.033131491392850876,
        0.02471674606204033,
        0.003482071915641427,
        -0.021363407373428345,
        -0.015058627352118492,
        0.012440008111298084,
        -0.023071475327014923,
        0.0088919997215271,
        -0.0305191520601511,
        0.02720349095761776,
        0.03413623571395874,
        -0.005836945027112961,
        -0.006668999791145325,
        0.003796054981648922,
        0.00045880774268880486,
        -0.0013477721950039268,
        -0.007686304859817028,
        0.02833382971584797,
        -0.007058338727802038,
        0.0036641822662204504,
        0.01891433820128441,
        -0.0021272350568324327,
        -0.02079823799431324,
        -0.005208978895097971,
        -0.012320694513618946,
        0.022581661120057106,
        -0.017331864684820175,
        0.005482144188135862,
        -0.011899957433342934,
        0.011328508146107197,
        -0.003271703375503421,
        0.012075788341462612,
        -0.013790135271847248,
        -0.00652456795796752,
        -0.006210584659129381,
        -0.0016672499477863312,
        0.02665088139474392,
        2.0819775272684637e-06,
        -0.001816391944885254,
        -0.01485767774283886,
        -0.017984949052333832,
        -0.024503236636519432,
        -0.04712257534265518,
        -0.004483677912503481,
        0.0015801197150722146,
        -0.029589762911200523,
        -0.004659508354961872,
        -0.005466444883495569,
        0.009551363997161388,
        0.02565869502723217,
        0.016779253259301186,
        -0.012358373031020164,
        0.012553042732179165,
        0.023247305303812027,
        -0.0181733388453722,
        -0.0057301907800138,
        0.018688270822167397,
        0.015975456684827805,
        -0.02584708482027054,
        0.016251763328909874,
        -0.0035699873697012663,
        -0.019178085029125214,
        0.004577872809022665,
        -0.014104118570685387,
        -0.004160275217145681,
        0.011585974134504795,
        0.013388236984610558,
        0.00026119465474039316,
        -0.016113610938191414,
        0.006348737049847841,
        0.02936369553208351,
        0.016867170110344887,
        0.009036432020366192,
        -0.007322084624320269,
        -0.003296822076663375,
        0.037050001323223114,
        -0.014217152260243893,
        0.0041539957746863365,
        -0.007334643974900246,
        -0.012697474099695683,
        0.019655339419841766,
        -0.007052059285342693,
        -0.007667466066777706,
        -0.0007700434071011841,
        -0.03576894849538803,
        -0.003962466027587652,
        -0.0019985020626336336,
        0.008138440549373627,
        0.014342745766043663,
        -0.015837304294109344,
        -0.03210162743926048,
        -0.0056108771823346615,
        -0.016703898087143898,
        0.00034538135514594615,
        0.009984660893678665,
        -0.01095172856003046,
        0.001340707647614181,
        0.013840372674167156,
        0.016716457903385162,
        0.00038973146001808345,
        0.005940559320151806,
        -0.011535737663507462,
        -0.01093916967511177,
        -0.016314558684825897,
        -0.02427716925740242,
        -0.012634677812457085,
        -0.0012151143746450543,
        0.041244812309741974,
        0.006957864388823509,
        -0.018110541626811028,
        -0.0088919997215271,
        -0.005127343349158764,
        -0.02747979573905468,
        -0.03227745741605759,
        -0.002741072094067931,
        -0.0029655699618160725,
        -0.0056234365329146385,
        0.016741575673222542,
        -0.008527779951691628,
        0.022016491740942,
        0.006263962015509605,
        -0.0022748070769011974,
        -0.00584008451551199,
        -0.019630219787359238,
        0.011824601329863071,
        0.004703465849161148,
        0.01513398252427578,
        -0.000919970334507525,
        -0.02328498288989067,
        0.002163343131542206,
        0.0026296081487089396,
        0.011912517249584198,
        -0.0042073726654052734,
        0.00936297420412302,
        -0.009331576526165009,
        -0.010487033985555172,
        -0.014355304650962353,
        -0.0040284027345478535,
        -0.002763050841167569,
        0.007253008428961039,
        0.016364796087145805,
        -0.0157996267080307,
        0.001935705542564392,
        0.0050582666881382465,
        -0.003422415116801858,
        0.003422415116801858,
        -0.00468148710206151,
        0.008433585055172443,
        -0.015360050834715366,
        0.015837304294109344,
        -0.019981881603598595,
        -0.006320478860288858,
        0.003088023280724883,
        -0.02336033806204796,
        0.015159101225435734,
        -0.025420067831873894,
        0.00652456795796752,
        0.0294390507042408,
        0.01068798266351223,
        0.00756699126213789,
        -0.0008603135356679559,
        -0.0018933177925646305,
        0.0024160996545106173,
        0.005488423630595207,
        -0.00935041531920433,
        -0.011315949261188507,
        -2.2285437808022834e-05,
        -0.007372322026640177,
        -0.021149897947907448,
        -0.02966511808335781,
        0.0021617733873426914,
        -0.025721492245793343,
        -0.014330185949802399,
        0.001420773332938552,
        0.03476420417428017,
        0.015787066891789436,
        -0.026550406590104103,
        -0.01319984719157219,
        -0.023159390315413475,
        0.013011457398533821,
        0.009406931698322296,
        -0.013388236984610558,
        0.001562850666232407,
        -0.00962672010064125,
        -0.0042324913665652275,
        0.021916016936302185,
        -0.01762072928249836,
        0.005290614441037178,
        0.018537558615207672,
        0.016176406294107437,
        -0.004251330625265837,
        0.039260439574718475,
        0.2337038666009903,
        -0.012791668996214867,
        0.0035228899214416742,
        0.027354203164577484,
        0.02217976190149784,
        0.013551508076488972,
        0.010210729204118252,
        0.0017080678371712565,
        0.006982983089983463,
        0.0028289873152971268,
        -0.01159853395074606,
        0.0012826207093894482,
        -0.00470974575728178,
        0.006756915245205164,
        0.005450745578855276,
        0.00941949151456356,
        -0.02107454277575016,
        -0.019793491810560226,
        -0.030921051278710365,
        -0.024503236636519432,
        0.006279660854488611,
        -0.03295566141605377,
        -0.025256795808672905,
        -0.014305067248642445,
        0.017495134845376015,
        -0.0072969659231603146,
        -0.008446143940091133,
        0.0011868559522554278,
        0.03363386541604996,
        0.009746033698320389,
        0.004763122648000717,
        -0.011573415249586105,
        0.011228034272789955,
        0.011246873065829277,
        -0.014367864467203617,
        -0.019567424431443214,
        0.03604525327682495,
        0.013363118283450603,
        0.03084569424390793,
        0.003871411085128784,
        -0.016553185880184174,
        -0.025068406015634537,
        -0.004518216010183096,
        -0.017633287236094475,
        0.0027238030452281237,
        0.011096160858869553,
        0.007246728986501694,
        -0.026173627004027367,
        0.018123101443052292,
        0.033985525369644165,
        0.004656368400901556,
        0.007014381233602762,
        0.02300867810845375,
        0.044032983481884,
        0.00041406514355912805,
        -0.015950338914990425,
        0.008653372526168823,
        0.016666220501065254,
        -0.005259216297417879,
        -0.000553002639207989,
        0.0005902881384827197,
        0.03185044229030609,
        -2.5854542400338687e-05,
        0.03943626955151558,
        -0.023598965257406235,
        0.007472796365618706,
        -0.02519400045275688,
        -0.002959290286526084,
        0.003968745935708284,
        -0.0035166102461516857,
        -0.01667878031730652,
        -0.0014144936576485634,
        0.00603475421667099,
        -0.00840846635401249,
        -0.029514405876398087,
        -0.009519966319203377,
        0.010499592870473862,
        0.0036798813380301,
        0.03335756063461304,
        0.0274546779692173,
        -0.009388092905282974,
        -0.0015463664894923568,
        -0.009859067387878895,
        -0.02010747417807579,
        -0.038657594472169876,
        -0.029037151485681534,
        0.0022512583527714014,
        -0.00729068648070097,
        -0.012107186019420624,
        -0.023372897878289223,
        -0.014531135559082031,
        -0.010254686698317528,
        0.001805402571335435,
        -0.006069292314350605,
        0.023686882108449936,
        0.026198744773864746,
        -0.0034569534473121166,
        0.010612627491354942,
        -0.02091127075254917,
        0.012364652007818222,
        -0.005510402377694845,
        -0.00246790680103004,
        7.820140308467671e-05,
        0.0090866694226861,
        0.0022292796056717634,
        0.009903025813400745,
        -0.0071148560382425785,
        0.0007563066901639104,
        -0.00300324778072536,
        -0.01563635654747486,
        -0.022280236706137657,
        -0.0016735296230763197,
        -0.006807152647525072,
        -0.01108988095074892,
        -0.00680087273940444,
        0.00976487249135971,
        -0.005080245900899172,
        -0.00265472661703825,
        -0.0003416528052184731,
        -0.008885719813406467,
        -0.003959326073527336,
        -0.013915728777647018,
        -0.014794881455600262,
        0.002414529677480459,
        -0.006169767118990421,
        -0.014807440340518951,
        -0.006888788193464279,
        0.00915574561804533,
        0.014531135559082031,
        -0.030016779899597168,
        0.00854661874473095,
        -0.0002739502233453095,
        0.021237812936306,
        0.008113321848213673,
        -0.006989262532442808,
        0.007334643974900246,
        0.016992762684822083,
        -0.0028666651342064142,
        -0.016879728063941002,
        0.0035448686685413122,
        -0.0021131059620529413,
        -0.010003499686717987,
        0.0088919997215271,
        -0.0073534827679395676,
        -0.014920474030077457,
        -0.01649039052426815,
        0.03112199902534485,
        -0.027555152773857117,
        -0.010970567353069782,
        0.002863525412976742,
        -0.04272681474685669,
        -0.018185898661613464,
        -0.013903168961405754,
        -0.0035385889932513237,
        0.027931932359933853,
        -0.020735440775752068,
        -0.019680457189679146,
        -0.02485489845275879,
        -0.006210584659129381,
        0.009896745905280113,
        -0.012502805329859257,
        0.009645558893680573,
        0.02996654249727726,
        -0.007862135767936707,
        -0.038657594472169876,
        -0.013149609789252281,
        -0.16045789420604706,
        0.014430660754442215,
        0.011523177847266197,
        -0.012760271318256855,
        0.02066008374094963,
        0.015171661041676998,
        0.024641389027237892,
        0.004643809515982866,
        -0.009067830629646778,
        0.018575238063931465,
        0.009859067387878895,
        -0.01288586389273405,
        -0.014945592731237411,
        -0.014305067248642445,
        0.005290614441037178,
        -0.01612616889178753,
        -0.008182398043572903,
        0.012352093122899532,
        0.029740475118160248,
        0.028685491532087326,
        0.035115864127874374,
        -0.007334643974900246,
        -0.0009168304968625307,
        -0.00477882195264101,
        0.013714779168367386,
        -0.007811898365616798,
        0.0007111715967766941,
        0.03393528610467911,
        -0.010053737089037895,
        -0.007020661141723394,
        -0.016000576317310333,
        -0.02888644114136696,
        0.02582196518778801,
        0.004559034015983343,
        0.021966254338622093,
        -0.006430373061448336,
        0.012446288019418716,
        -0.042601220309734344,
        -0.01635223627090454,
        0.024930253624916077,
        0.032855186611413956,
        0.018625473603606224,
        0.019228322431445122,
        -0.013526389375329018,
        -0.015410288237035275,
        -0.005224677734076977,
        0.020609848201274872,
        0.015083746053278446,
        0.01621408388018608,
        -0.0070897373370826244,
        0.008119601756334305,
        -0.003028366481885314,
        -0.0005110073834657669,
        -0.00622942391782999,
        0.019981881603598595,
        0.029489288106560707,
        0.006888788193464279,
        0.02452835626900196,
        0.0024773261975497007,
        -0.008458703756332397,
        -0.0002564848982729018,
        -0.012370931915938854,
        -0.006254542153328657,
        -0.004706605803221464,
        0.006050453521311283,
        -0.005224677734076977,
        0.0014686556532979012,
        0.009689517319202423,
        -0.00024942029267549515,
        0.018349168822169304,
        -0.008910838514566422,
        -0.023084033280611038,
        -0.01039911899715662,
        -0.02554566040635109,
        0.006825991440564394,
        0.008389626629650593,
        -0.03378457576036453,
        0.03109688125550747,
        -0.0114603815600276,
        -0.014832559041678905,
        -0.008295431733131409,
        0.021313169971108437,
        -0.0024836058728396893,
        0.007070898078382015,
        -0.023121710866689682,
        0.00038973146001808345,
        0.0032465846743434668,
        0.012653516605496407,
        -0.03134806826710701,
        -0.027002541348338127,
        0.007453957572579384,
        -0.017984949052333832,
        -0.014166914857923985,
        -0.018022626638412476,
        0.00543504673987627,
        0.015598678030073643,
        0.012534203007817268,
        0.01623920351266861,
        -0.01571171171963215,
        -0.019893966615200043,
        -0.002260677982121706,
        0.017143474891781807,
        -0.016013136133551598,
        0.011190355755388737,
        0.03049403429031372,
        -0.005469584837555885,
        0.017532814294099808,
        0.007981449365615845,
        0.022405831143260002,
        -0.03169972822070122,
        -0.03217698261141777,
        0.006631322205066681,
        0.014694406650960445,
        0.026148507371544838,
        0.01623920351266861,
        0.026449931785464287,
        -0.013262644410133362,
        -0.01734442450106144,
        0.0048321993090212345,
        -0.013099372386932373,
        0.052045829594135284,
        0.006223144009709358,
        -0.04420881345868111,
        -0.0010887362295761704,
        0.0028792244847863913,
        0.0002376459160586819,
        -0.08409722149372101,
        -0.03438742458820343,
        0.004712885711342096,
        0.014405542053282261,
        -0.001269276486709714,
        0.03835617005825043,
        0.004216792527586222,
        -0.0017881334060803056,
        0.0019875126890838146,
        0.005535521078854799,
        0.008923398330807686,
        -0.03049403429031372,
        -0.01787191443145275,
        -0.010568669065833092,
        0.04071732237935066,
        -0.02720349095761776,
        0.012779110111296177,
        0.0010651875054463744,
        -0.033960405737161636,
        0.011604813858866692,
        -0.008678491227328777,
        0.007868414744734764,
        0.0028478263411670923,
        -0.001828951295465231,
        -0.02222999930381775,
        0.012201380915939808,
        -0.022971000522375107,
        0.03127271309494972,
        0.0016358516877517104,
        -0.004926394205540419,
        -0.016754135489463806,
        -0.024063661694526672,
        -0.014166914857923985,
        -0.008307991549372673,
        -0.004449139814823866,
        0.013626864179968834,
        -0.04099362716078758,
        0.03102152608335018,
        0.013413355685770512,
        -0.04106898233294487,
        0.027253728359937668,
        0.010593787766993046,
        -0.006461771205067635,
        -0.05581362545490265,
        -0.012659796513617039,
        0.017168592661619186,
        -0.0019137266790494323,
        0.03777844086289406,
        0.01870083063840866,
        -0.009048991836607456,
        -0.03222722187638283,
        -0.03139830380678177,
        -0.021916016936302185,
        0.010832414962351322,
        0.01048075407743454,
        -0.0017535953084006906,
        0.008056805469095707,
        0.028911558911204338,
        -0.009921864606440067,
        0.011962753720581532,
        0.013375678099691868,
        0.006147787906229496,
        -0.02038377895951271,
        0.01704300008714199,
        -0.012559321708977222,
        0.018575238063931465,
        -0.012697474099695683,
        0.0232221856713295,
        0.0201577115803957,
        0.00969579629600048,
        -0.01563635654747486,
        0.028409186750650406,
        0.006245122756808996,
        0.010053737089037895,
        -0.03792915120720863,
        -0.005469584837555885,
        -0.015837304294109344,
        -0.020258186385035515,
        0.006606203503906727,
        -0.005811826325953007,
        -0.03310637176036835,
        -0.020735440775752068,
        0.009218541905283928,
        -0.008182398043572903,
        0.026022914797067642,
        0.01360174547880888,
        -0.01054983027279377,
        0.0057710083201527596,
        -0.015347491018474102,
        -0.03544240817427635,
        -0.03054427169263363,
        0.01035516057163477,
        0.006989262532442808,
        -0.011510618962347507,
        0.007529313676059246,
        0.014757202938199043,
        -0.01180576253682375,
        -0.0010895212180912495,
        0.01607593148946762,
        0.010807296261191368,
        -0.02485489845275879,
        -0.011824601329863071,
        -0.0650070533156395,
        0.032352812588214874,
        -0.016113610938191414,
        -0.019353915005922318,
        0.0009600031771697104,
        -0.022016491740942,
        0.006687839049845934,
        -0.01512142363935709,
        0.002095836913213134,
        -0.00881036464124918,
        -0.018675711005926132,
        -0.0048635974526405334,
        -0.002117815660312772,
        0.012615839019417763,
        -0.02798216976225376,
        0.0032685634214431047,
        0.020182831212878227,
        0.014192033559083939,
        0.005880902521312237,
        -0.002144504338502884,
        0.00037756460369564593,
        -0.03245328739285469,
        -0.016502948477864265,
        0.001036928966641426,
        -0.023774797096848488,
        0.011177796870470047,
        -0.0314234234392643,
        0.0005577123956754804,
        -0.002411389723420143,
        -0.00874128844588995,
        0.0064366525039076805,
        -0.042576100677251816,
        0.0064021144062280655,
        0.022682135924696922,
        0.0008108612382784486,
        -0.010122813284397125,
        0.006794593296945095,
        0.015083746053278446,
        0.007366042118519545,
        0.020333541557192802,
        -0.030895931646227837,
        -0.033181726932525635,
        0.015611236914992332,
        -0.010832414962351322,
        0.0036013855133205652,
        0.004634389653801918,
        -0.005340851843357086,
        -0.0033941566944122314,
        0.030418677255511284,
        0.009808829985558987,
        -0.003227745648473501,
        0.029263220727443695,
        -0.028409186750650406,
        -0.013036576099693775,
        -0.012841906398534775,
        -0.008351949043571949,
        -0.017143474891781807,
        -0.0003757984668482095,
        -0.008333110250532627,
        -0.04493725299835205,
        0.02248118631541729,
        0.00018220828496851027,
        0.02966511808335781,
        -0.007868414744734764,
        0.005246656946837902,
        0.010813576169312,
        -0.0205470509827137,
        -0.002692404668778181,
        -0.014619050547480583,
        -0.02234303392469883,
        -0.006920186337083578,
        -0.00593741936609149,
        0.00517130084335804,
        0.005312593188136816,
        0.019630219787359238,
        0.020647525787353516,
        -0.003180648200213909,
        0.0170932374894619,
        -0.040616847574710846,
        0.037728201597929,
        0.020258186385035515,
        0.021112220361828804,
        -0.030318202450871468,
        -0.0004332966054789722,
        0.034563254565000534,
        0.006088131107389927,
        -0.006254542153328657,
        -0.01731930486857891,
        -0.009205983020365238,
        0.00310058263130486,
        -0.03607037290930748,
        -0.0266759991645813,
        0.006939025595784187,
        0.024289729073643684,
        -0.0038368727546185255,
        0.00473800441250205,
        0.017520254477858543,
        0.00783073715865612,
        0.017658406868577003,
        0.023372897878289223,
        0.0023721419274806976,
        0.009438330307602882,
        -0.016000576317310333,
        -0.004552754107862711,
        -0.008785245940089226,
        0.005924860015511513,
        -0.006289080251008272,
        -0.037878915667533875,
        -0.007328364532440901,
        0.012898423708975315,
        0.011812042444944382,
        0.029790712520480156,
        -0.006361296400427818,
        0.015046067535877228,
        -0.02499305084347725,
        -0.00584008451551199,
        0.02609827183187008,
        -0.0014945593429729342,
        -0.028434304520487785,
        0.04242539033293724,
        -0.008973635733127594,
        0.0035794067662209272,
        0.02770586498081684,
        -0.015071186237037182,
        0.008433585055172443,
        0.002970279660075903,
        -0.007793059106916189,
        -0.0035228899214416742,
        0.01905249059200287,
        -0.0050017498433589935,
        0.011717847548425198,
        -0.010041178204119205,
        -0.016452711075544357,
        -0.015410288237035275,
        0.0009136906592175364,
        0.01372733898460865,
        -0.008351949043571949,
        0.024339966475963593,
        -0.0161889661103487,
        0.05631599947810173,
        -0.012722592800855637,
        -0.0041634151712059975,
        0.02287052571773529,
        0.014292508363723755,
        0.020735440775752068,
        0.024427881464362144,
        0.018072864040732384,
        -0.008559177629649639,
        -0.02024562656879425,
        -0.0038494321051985025,
        0.006756915245205164,
        -0.0008289152756333351,
        -0.02259422093629837,
        -0.011127559468150139,
        0.004289008677005768,
        -0.01591266132891178,
        0.004398902412503958,
        -0.019630219787359238,
        0.01292354241013527,
        0.018324051052331924,
        0.008942237123847008,
        0.0243902038782835,
        0.013212407007813454,
        -0.018072864040732384,
        -0.019931644201278687,
        0.00423877127468586,
        0.0012331684119999409,
        -0.018349168822169304,
        -0.016151288524270058,
        0.017243949696421623,
        0.0007649412145838141,
        -0.0012857605470344424,
        -0.01260955911129713,
        0.006656440440565348,
        0.002959290286526084,
        -0.007108576130121946,
        -0.009513686411082745,
        -0.0036422032862901688,
        0.010562390089035034,
        0.02289564348757267,
        0.0006240413058549166,
        -0.018097983673214912,
        -0.03295566141605377,
        0.006813432089984417,
        0.002750491490587592,
        -0.02936369553208351,
        -0.005384809337556362,
        -0.02317194826900959
      ]
    }
  ],
  "model": "ada",
  "usage": {
    "prompt_tokens": 3,
    "total_tokens": 3
  }
}


================================================
FILE: libs/ktem/ktem_tests/test_qa.py
================================================
import json
from pathlib import Path
from unittest.mock import patch

import pytest
from index import ReaderIndexingPipeline
from openai.resources.embeddings import Embeddings
from openai.types.chat.chat_completion import ChatCompletion

from kotaemon.llms import AzureChatOpenAI

with open(Path(__file__).parent / "resources" / "embedding_openai.json") as f:
    openai_embedding = json.load(f)


_openai_chat_completion_response = ChatCompletion.parse_obj(
    {
        "id": "chatcmpl-7qyuw6Q1CFCpcKsMdFkmUPUa7JP2x",
        "object": "chat.completion",
        "created": 1692338378,
        "model": "gpt-35-turbo",
        "system_fingerprint": None,
        "choices": [
            {
                "index": 0,
                "finish_reason": "stop",
                "message": {
                    "role": "assistant",
                    "content": "Hello! How can I assist you today?",
                    "function_call": None,
                    "tool_calls": None,
                },
            }
        ],
        "usage": {"completion_tokens": 9, "prompt_tokens": 10, "total_tokens": 19},
    }
)


@pytest.fixture(scope="function")
def mock_openai_embedding(monkeypatch):
    monkeypatch.setattr(Embeddings, "create", lambda *args, **kwargs: openai_embedding)


@patch(
    "openai.resources.chat.completions.Completions.create",
    side_effect=lambda *args, **kwargs: _openai_chat_completion_response,
)
def test_ingest_pipeline(patch, mock_openai_embedding, tmp_path):
    indexing_pipeline = ReaderIndexingPipeline(
        storage_path=tmp_path,
    )
    indexing_pipeline.indexing_vector_pipeline.embedding.openai_api_key = "some-key"
    input_file_path = Path(__file__).parent / "resources/dummy.pdf"

    # call ingestion pipeline
    indexing_pipeline(input_file_path, force_reindex=True)
    retrieving_pipeline = indexing_pipeline.to_retrieving_pipeline()

    results = retrieving_pipeline("This is a query")
    assert len(results) == 1

    # create llm
    llm = AzureChatOpenAI(
        api_key="dummy",
        api_version="2024-05-01-preview",
        azure_deployment="gpt-4o",
        azure_endpoint="https://test.openai.azure.com/",
    )
    qa_pipeline = indexing_pipeline.to_qa_pipeline(llm=llm, openai_api_key="some-key")
    response = qa_pipeline("Summarize this document.")
    assert response


================================================
FILE: libs/ktem/migrations/README
================================================
Generic single-database configuration.

To enable database migration, please set `KH_ENABLE_ALEMBIC` to True in the
setting file.


================================================
FILE: libs/ktem/migrations/env.py
================================================
from logging.config import fileConfig

from alembic import context
from ktem.db.models import *  # noqa
from sqlalchemy import engine_from_config, pool
from sqlmodel import SQLModel
from theflow.settings import settings

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
    fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = SQLModel.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.

    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.

    Calls to context.execute() here emit the given string to the
    script output.

    """
    context.configure(
        url=settings.KH_DATABASE,
        target_metadata=target_metadata,
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
    )

    with context.begin_transaction():
        context.run_migrations()


def run_migrations_online() -> None:
    """Run migrations in 'online' mode.

    In this scenario we need to create an Engine
    and associate a connection with the context.

    """
    configuration = config.get_section(config.config_ini_section, {})
    configuration["sqlalchemy.url"] = settings.KH_DATABASE
    connectable = engine_from_config(
        configuration, prefix="sqlalchemy.", poolclass=pool.NullPool
    )

    with connectable.connect() as connection:
        context.configure(connection=connection, target_metadata=target_metadata)

        with context.begin_transaction():
            context.run_migrations()


if context.is_offline_mode():
    run_migrations_offline()
else:
    run_migrations_online()


================================================
FILE: libs/ktem/migrations/script.py.mako
================================================
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}


def upgrade() -> None:
    ${upgrades if upgrades else "pass"}


def downgrade() -> None:
    ${downgrades if downgrades else "pass"}


================================================
FILE: libs/ktem/migrations/versions/.keep
================================================


================================================
FILE: libs/ktem/pyproject.toml
================================================
[build-system]
requires = ["setuptools >= 61.0", "wheel", "setuptools-git-versioning>=2.0,<3"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = true
packages.find.exclude = ["ktem_tests*", "env*"]
packages.find.include = ["ktem*"]

[tool.setuptools-git-versioning]
enabled = true
dev_template = "{tag}"
dirty_template = "{tag}"
tag_filter = "v?\\d+(\\.\\d+)*.*"

[project]
name = "ktem"
dynamic = ["version"]
requires-python = ">= 3.10"
description = "RAG-based Question and Answering Application"
dependencies = [
    "click>=8.1.7,<9",
    "platformdirs>=4.2.1,<5",
    "pluggy>=1.5.0,<2",
    "python-decouple>=3.8,<4",
    "SQLAlchemy>=2.0.29,<3",
    "sqlmodel>=0.0.16,<0.1",
    "tiktoken>=0.6.0,<1",
    "gradio>=4.31.0,<5",
    "gradiologin",
    "python-multipart==0.0.12", # required for gradio, pinning to avoid yanking issues with micropip (fixed in gradio >= 5.4.0)
    "markdown>=3.6,<4",
    "tzlocal>=5.0",
    "mcp>=1.0.0",
]
authors = [
    { name = "@trducng", email = "john@cinnamon.is" },
    { name = "@lone17", email = "ian@cinnamon.is" },
    { name = "@taprosoft", email = "tadashi@cinnamon.is" },
    { name = "@cin-albert", email = "albert@cinnamon.is" },
]
classifiers = [
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
]


================================================
FILE: libs/ktem/requirements.txt
================================================
platformdirs
tzlocal


================================================
FILE: mkdocs.yml
================================================
repo_name: Cinnamon/kotaemon
repo_url: https://github.com/Cinnamon/kotaemon
site_name: kotaemon Docs
edit_uri: edit/main/docs/

nav:
  - Getting Started:
      - Quick Start: index.md
      - Basic Usage: usage.md
  # - Application:
  #     - Customize UI: pages/app/customize-ui.md
  # - Functional description: pages/app/functional-description.md
  - Development:
      - development/index.md
      # - Data & Data Structure Components: development/data-components.md
      # - Features: pages/app/features.md
      - Customize flow logic: pages/app/customize-flows.md
      - Creating a Component: development/create-a-component.md
      - Components:
          - Index:
              - File index: pages/app/index/file.md
          - Settings:
              - pages/app/settings/overview.md
              - pages/app/settings/user-settings.md
          - Extension:
              - User management: pages/app/ext/user-management.md
      - Contributing: development/contributing.md
  # generated using gen-files + literate-nav
  - API Reference: reference/
  - Changelogs: https://github.com/Cinnamon/kotaemon/releases
  - Issue Tracker: https://github.com/Cinnamon/kotaemon/issues
  - Live Demo: https://huggingface.co/spaces/cin-model/kotaemon-demo

markdown_extensions:
  - admonition
  - md_in_html
  - pymdownx.highlight:
      use_pygments: true
      anchor_linenums: true
      line_spans: __span
      linenums: true
      pygments_lang_class: true
  - pymdownx.inlinehilite
  - pymdownx.snippets
  - pymdownx.details
  - pymdownx.extra
  - pymdownx.tabbed:
      alternate_style: true
  - pymdownx.superfences:
      custom_fences:
        - name: mermaid
          class: mermaid
          format: !!python/name:pymdownx.superfences.fence_code_format
  - toc:
      permalink: true
      title: Page contents
  - mdx_truly_sane_lists

plugins:
  - search
  - gen-files:
      scripts:
        - docs/scripts/generate_reference_docs.py
  - literate-nav:
      nav_file: Summary.md
  - mkdocstrings:
      handlers:
        python:
          options:
            docstring_options:
              ignore_init_summary: false
            filters:
              - "!^_"
            members_order: source
            separate_signature: true
          paths: [libs/kotaemon/kotaemon]
  - git-revision-date-localized:
      enable_creation_date: true
      type: timeago
      fallback_to_build_date: true
  - section-index
  - mkdocs-video
  - include-markdown

theme:
  features:
    - content.action.edit
    - content.tabs.link
    - content.code.annotate
    - content.code.annotations
    - content.code.copy
    - navigation.tabs
    - navigation.top
    - navigation.instant
    - navigation.indexes
    - toc.follow
    - search.share
    - search.highlight
    - search.suggest
  name: material
  custom_dir: docs/theme
  palette:
    scheme: dracula
    primary: deep purple
    accent: deep purple
  icon:
    repo: fontawesome/brands/github
    edit: material/pencil
    view: material/eye

extra_css:
  - extra/css/code_select.css
  - assets/pymdownx-extras/extra-fb5a2a1c86.css

extra_javascript:
  - assets/pymdownx-extras/extra-loader-MCFnu0Wd.js

validation:
  absolute_links: warn
  omitted_files: warn
  unrecognized_links: warn


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools >= 61.0", "wheel", "setuptools-git-versioning>=2.0,<3"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = false
packages.find.include = []

[tool.setuptools-git-versioning]
enabled = true
dev_template = "{tag}"
dirty_template = "{tag}"
tag_filter = "v?\\d+(\\.\\d+)*.*"

[project]
name = "kotaemon-app"
dynamic = ["version"]
requires-python = ">= 3.10"
description = "Kotaemon App"
dependencies = [
    "kotaemon[all]",
    "ktem"
]
authors = [
    { name = "@trducng", email = "john@cinnamon.is" },
    { name = "@lone17", email = "ian@cinnamon.is" },
    { name = "@taprosoft", email = "tadashi@cinnamon.is" },
    { name = "@cin-albert", email = "albert@cinnamon.is" },
]
classifiers = [
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
]

[tool.uv.sources]
kotaemon = { workspace = true }
ktem = { workspace = true }

# uv workspace configuration
[tool.uv.workspace]
members = ["libs/kotaemon", "libs/ktem"]

[dependency-groups]
dev = [
    "black",
    "coverage",
    "flake8",
    "ipython",
    "pre-commit",
    "pytest",
    "pytest-mock",
    "sphinx"
]

[project.urls]
Homepage = "https://cinnamon.github.io/kotaemon/"
Repository = "https://github.com/Cinnamon/kotaemon/"
Documentation = "https://cinnamon.github.io/kotaemon/"

[tool.codespell]
skip = "*.js,*.css,*.map"
# `llm` abbreviation for large language models
ignore-words-list = "llm,fo"
quiet-level = 3
check-filenames = ""

[tool.isort]
known_first_party = ["kotaemon"]


================================================
FILE: scripts/download_pdfjs.sh
================================================
#!/bin/bash

set -eo pipefail

# Check and capture input argument for PDFJS_VERSION_DIST
if [ -z "$1" ]; then
    echo "Usage: $0 <pdfjs_version_dist>"
    exit 1
fi

pdfjs_version_dist=$1

function check_path_for_spaces() {
    if [[ $PWD =~ \  ]]; then
        echo "The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later."
        exit 1
    fi
}

function download_and_unzip() {
    local url=$1
    local dest_dir=$2

    if [ -d "$dest_dir" ]; then
        echo "Destination directory $dest_dir already exists. Skipping download."
        return
    fi

    mkdir -p "$dest_dir"

    local zip_file="${dest_dir}/downloaded.zip"
    echo "Downloading $url to $zip_file"
    curl -L -o "$zip_file" "$url"

    echo "Unzipping $zip_file to $dest_dir"
    unzip -o "$zip_file" -d "$dest_dir"

    rm "$zip_file"
    echo "Download and unzip completed successfully."
}

# Main script execution

pdf_js_version="4.0.379"
pdf_js_dist_name="pdfjs-${pdf_js_version}-dist"
pdf_js_dist_url="https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip"

check_path_for_spaces

echo "Downloading and unzipping PDF.js"
download_and_unzip "$pdf_js_dist_url" "$pdfjs_version_dist"

echo "PDF.js has been set up in $pdfjs_version_dist"


================================================
FILE: scripts/migrate/__init__.py
================================================


================================================
FILE: scripts/migrate/migrate_chroma_db.py
================================================
import uuid
from datetime import datetime

import chromadb
from ktem.index.models import Index
from sqlalchemy import (
    JSON,
    Column,
    DateTime,
    Integer,
    String,
    UniqueConstraint,
    create_engine,
    select,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.orm import Session
from tzlocal import get_localzone


def _init_resource(private: bool = True, id: int = 1):
    """Init schemas. Hard-code"""
    Base = declarative_base()

    if private:
        Source = type(
            "Source",
            (Base,),
            {
                "__tablename__": f"index__{id}__source",
                "__table_args__": (
                    UniqueConstraint("name", "user", name="_name_user_uc"),
                ),
                "id": Column(
                    String,
                    primary_key=True,
                    default=lambda: str(uuid.uuid4()),
                    unique=True,
                ),
                "name": Column(String),
                "path": Column(String),
                "size": Column(Integer, default=0),
                "date_created": Column(
                    DateTime(timezone=True), default=datetime.now(get_localzone())
                ),
                "user": Column(Integer, default=1),
                "note": Column(
                    MutableDict.as_mutable(JSON),  # type: ignore
                    default={},
                ),
            },
        )
    else:
        Source = type(
            "Source",
            (Base,),
            {
                "__tablename__": f"index__{id}__source",
                "id": Column(
                    String,
                    primary_key=True,
                    default=lambda: str(uuid.uuid4()),
                    unique=True,
                ),
                "name": Column(String, unique=True),
                "path": Column(String),
                "size": Column(Integer, default=0),
                "date_created": Column(
                    DateTime(timezone=True), default=datetime.now(get_localzone())
                ),
                "user": Column(Integer, default=1),
                "note": Column(
                    MutableDict.as_mutable(JSON),  # type: ignore
                    default={},
                ),
            },
        )
    Index = type(
        "IndexTable",
        (Base,),
        {
            "__tablename__": f"index__{id}__index",
            "id": Column(Integer, primary_key=True, autoincrement=True),
            "source_id": Column(String),
            "target_id": Column(String),
            "relation_type": Column(String),
            "user": Column(Integer, default=1),
        },
    )

    return {"Source": Source, "Index": Index}


def get_chromadb_collection(
    db_dir: str = "../ktem_app_data/user_data/vectorstore",
    collection_name: str = "index_1",
):
    """Extract collection from chromadb"""
    client = chromadb.PersistentClient(path=db_dir)
    collection = client.get_or_create_collection(collection_name)

    return collection


def update_metadata(metadata, file_id):
    """Update file_id"""
    metadata["file_id"] = file_id
    return metadata


def migrate_chroma_db(
    chroma_db_dir: str, sqlite_path: str, is_private: bool = True, int_index: int = 1
):
    chroma_collection_name = f"index_{int_index}"

    """Update chromadb with metadata.file_id"""
    engine = create_engine(sqlite_path)
    resource = _init_resource(private=is_private, id=int_index)
    print("Load sqlalchemy engine successfully!")

    chroma_db_collection = get_chromadb_collection(
        db_dir=chroma_db_dir, collection_name=chroma_collection_name
    )
    print(
        f"Load chromadb collection: {chroma_collection_name}, "
        f"path: {chroma_db_dir} successfully!"
    )

    # Load docs id of user
    with Session(engine) as session:
        stmt = select(resource["Source"])
        results = session.execute(stmt)
        doc_ids = [r[0].id for r in results.all()]
    print(f"Retrieve n-docs: {len(doc_ids)}")
    print(doc_ids)

    for doc_id in doc_ids:
        print("-")
        # Find corresponding vector ids
        with Session(engine) as session:
            stmt = select(resource["Index"]).where(
                resource["Index"].relation_type == "vector",
                resource["Index"].source_id.in_([doc_id]),
            )
            results = session.execute(stmt)
            vs_ids = [r[0].target_id for r in results.all()]

        print(f"Got {len(vs_ids)} vs_ids for doc {doc_id}")

        # Update file_id
        if len(vs_ids) > 0:
            batch = chroma_db_collection.get(ids=vs_ids, include=["metadatas"])
            batch.update(
                ids=batch["ids"],
                metadatas=[
                    update_metadata(metadata, doc_id) for metadata in batch["metadatas"]
                ],
            )

        # Assert file_id. Skip
        print(f"doc-{doc_id} got updated")


def main(chroma_db_dir: str, sqlite_path: str):
    engine = create_engine(sqlite_path)

    with Session(engine) as session:
        stmt = select(Index)

        results = session.execute(stmt)
        file_indices = [r[0] for r in results.all()]

        for file_index in file_indices:
            _id = file_index.id
            _is_private = file_index.config["private"]

            print(f"Migrating for Index id: {_id}, is_private: {_is_private}")

            migrate_chroma_db(
                chroma_db_dir=chroma_db_dir,
                sqlite_path=sqlite_path,
                is_private=_is_private,
                int_index=_id,
            )


if __name__ == "__main__":
    chrome_db_dir: str = "./vectorstore/kan_db"
    sqlite_path: str = "sqlite:///../ktem_app_data/user_data/sql.db"

    main(chrome_db_dir, sqlite_path)


================================================
FILE: scripts/run_linux.sh
================================================
#!/bin/bash

# functions for better code organization
function check_path_for_spaces() {
    if [[ $PWD =~ \  ]]; then
        echo "The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later."
        exit 1
    fi
}

function install_miniconda() {
    # Miniconda installer is limited to two main architectures: x86_64 and arm64
    local sys_arch=$(uname -m)
    case "${sys_arch}" in
    x86_64*) sys_arch="x86_64" ;;
    arm64*) sys_arch="aarch64" ;;
    aarch64*) sys_arch="aarch64" ;;
    *) {
        echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
        exit 1
    } ;;
    esac

    # if miniconda has not been installed, download and install it
    if ! "${conda_root}/bin/conda" --version &>/dev/null; then
        if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
            echo "Downloading Miniconda from $miniconda_url"
            local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${sys_arch}.sh"

            mkdir -p "$install_dir"
            curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
        fi

        echo "Installing Miniconda to $conda_root"
        chmod u+x "$install_dir/miniconda_installer.sh"
        bash "$install_dir/miniconda_installer.sh" -b -p "$conda_root"
        rm -rf "$install_dir/miniconda_installer.sh"
    fi
    echo "Miniconda is installed at $conda_root"

    # test conda
    echo "Conda version: "
    "$conda_root/bin/conda" --version || {
        echo "Conda not found. Will exit now..."
        exit 1
    }
}

function create_conda_env() {
    local python_version="${1}"

    if [ ! -d "${env_dir}" ]; then
        echo "Creating conda environment with python=$python_version in $env_dir"
        "${conda_root}/bin/conda" create -y -k --prefix "$env_dir" python="$python_version" || {
            echo "Failed to create conda environment."
            echo "Will delete the ${env_dir} (if exist) and exit now..."
            rm -rf $env_dir
            exit 1
        }
    else
        echo "Conda environment exists at $env_dir"
    fi
}

function activate_conda_env() {
    # deactivate the current env(s) to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null

    # check if conda env is broken (because of interruption during creation)
    if [ ! -f "$env_dir/bin/python" ]; then
        echo "Conda environment appears to be broken. You may need to remove $env_dir and run the installer again."
        exit 1
    fi

    source "$conda_root/etc/profile.d/conda.sh" # conda init
    conda activate "$env_dir" || {
        echo "Failed to activate environment. Please remove $env_dir and run the installer again."
        exit 1
    }
    echo "Activate conda environment at $CONDA_PREFIX"
}

function deactivate_conda_env() {
    # Conda deactivate if we are in the right env
    if [ "$CONDA_PREFIX" == "$env_dir" ]; then
        conda deactivate
        echo "Deactivate conda environment at $env_dir"
    fi
}

function install_dependencies() {
    if pip list 2>/dev/null | grep -q "kotaemon"; then
        echo "Requirements are already installed"
    else
        local kotaemon_root="$(pwd)/libs/kotaemon"
        local ktem_root="$(pwd)/libs/ktem/"

        if [ -f "$(pwd)/VERSION" ]; then
            local app_version=$(<"$(pwd)/VERSION")
        else
            local app_version="latest"
        fi

        if [ -f "pyproject.toml" ]; then
            echo "Found pyproject.toml. Installing from source"
            echo "" && echo "Installing libs/kotaemon"
            python -m pip install -e "$kotaemon_root"
            echo "" && echo "Installing libs/ktem"
            python -m pip install -e "$ktem_root"

            python -m pip install --no-deps -e .
        else
            echo "Installing Kotaemon $app_version"
            # Work around for versioning control
            python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon"
            python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem"
            python -m pip install --no-deps "git+https://github.com/Cinnamon/kotaemon.git@$app_version"
        fi

        if ! pip list 2>/dev/null | grep -q "kotaemon"; then
            echo "Installation failed. You may need to run the installer again."
            deactivate_conda_env
            exit 1
        else
            print_highlight "Install finished successfully. Clear cache..."
            conda clean --all -y
            python -m pip cache purge

            print_highlight "Do you want to launch the web UI? [Y/N]"
            read -p "Input> " launch
            local launch=${launch,,}
            if [[ "$launch" != "yes" && "$launch" != "y" && "$launch" != "true" ]]; then
                echo "Will exit now..."
                deactivate_conda_env
                echo "Please run the installer again to launch the UI."
                exit 0
            fi
        fi
    fi
}

function setup_local_model() {
    python $(pwd)/scripts/serve_local.py
}

function download_and_unzip() {
    local url=$1
    local dest_dir=$2

    # Check if the destination directory exists, create if it doesn't
    if [ -d "$dest_dir" ]; then
        echo "Destination directory $dest_dir already exists. Skipping download."
        return
    fi

    mkdir -p "$dest_dir"

    # Download the ZIP file
    local zip_file="${dest_dir}/downloaded.zip"
    echo "Downloading $url to $zip_file"
    curl -L -o "$zip_file" "$url"

    # Unzip the file to the destination directory
    echo "Unzipping $zip_file to $dest_dir"
    unzip -o "$zip_file" -d "$dest_dir"

    # Clean up the downloaded ZIP file
    rm "$zip_file"
    echo "Download and unzip completed successfully."
}

function launch_ui() {
    local pdfjs_prebuilt_dir=$1
    PDFJS_PREBUILT_DIR="$pdfjs_prebuilt_dir" python $(pwd)/app.py || {
        echo "" && echo "Will exit now..."
        exit 1
    }
}

function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

# Main script execution

# move two levels up from the dir where this script resides
cd "$(dirname "${BASH_SOURCE[0]}")" && cd ..

install_dir="$(pwd)/install_dir"
conda_root="${install_dir}/conda"
env_dir="${install_dir}/env"
python_version="3.10"

pdf_js_version="4.0.379"
pdf_js_dist_name="pdfjs-${pdf_js_version}-dist"
pdf_js_dist_url="https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip"
target_pdf_js_dir="$(pwd)/libs/ktem/ktem/assets/prebuilt/${pdf_js_dist_name}"

check_path_for_spaces

print_highlight "Setting up Miniconda"
install_miniconda

print_highlight "Creating conda environment"
create_conda_env "$python_version"
activate_conda_env

print_highlight "Installing requirements"
install_dependencies

print_highlight "Downloading and unzipping PDF.js"
download_and_unzip $pdf_js_dist_url $target_pdf_js_dir

print_highlight "Setting up a local model"
setup_local_model

print_highlight "Launching Kotaemon in your browser, please wait..."
launch_ui $target_pdf_js_dir

deactivate_conda_env

read -p "Press enter to continue"


================================================
FILE: scripts/run_macos.sh
================================================
#!/bin/bash

# functions for better code organization
function check_path_for_spaces() {
    if [[ $PWD =~ \  ]]; then
        echo "The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later."
        exit 1
    fi
}

function install_miniconda() {
    # Miniconda installer is limited to two main architectures: x86_64 and arm64
    local sys_arch=$(uname -m)
    case "${sys_arch}" in
    x86_64*) sys_arch="x86_64" ;;
    arm64*) sys_arch="arm64" ;;
    *) {
        echo "Unknown system architecture: ${sys_arch}! This script runs only on x86_64 or arm64"
        exit 1
    } ;;
    esac

    # if miniconda has not been installed, download and install it
    if ! "${conda_root}/bin/conda" --version &>/dev/null; then
        if [ ! -d "$install_dir/miniconda_installer.sh" ]; then
            local miniconda_url="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-${sys_arch}.sh"
            echo "Downloading Miniconda from $miniconda_url"

            mkdir -p "$install_dir"
            curl -Lk "$miniconda_url" >"$install_dir/miniconda_installer.sh"
        fi

        echo "Installing Miniconda to $conda_root"
        chmod u+x "$install_dir/miniconda_installer.sh"
        bash "$install_dir/miniconda_installer.sh" -b -p "$conda_root"
        rm -rf "$install_dir/miniconda_installer.sh"
    fi
    echo "Miniconda is installed at $conda_root"

    # test conda
    echo "Conda version: "
    "$conda_root/bin/conda" --version || {
        echo "Conda not found. Will exit now..."
        exit 1
    }
}

function create_conda_env() {
    local python_version="${1}"

    if [ ! -d "${env_dir}" ]; then
        echo "Creating conda environment with python=$python_version in $env_dir"
        "${conda_root}/bin/conda" create -y -k --prefix "$env_dir" python="$python_version" || {
            echo "Failed to create conda environment."
            echo "Will delete the ${env_dir} (if exist) and exit now..."
            rm -rf $env_dir
            exit 1
        }
    else
        echo "Conda environment exists at $env_dir"
    fi
}

function activate_conda_env() {
    # deactivate the current env(s) to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null

    # check if conda env is broken (because of interruption during creation)
    if [ ! -f "$env_dir/bin/python" ]; then
        echo "Conda environment appears to be broken. You may need to remove $env_dir and run the installer again."
        exit 1
    fi

    source "$conda_root/etc/profile.d/conda.sh" # conda init
    conda activate "$env_dir" || {
        echo "Failed to activate environment. Please remove $env_dir and run the installer again."
        exit 1
    }
    echo "Activate conda environment at $CONDA_PREFIX"
}

function deactivate_conda_env() {
    # Conda deactivate if we are in the right env
    if [[ "$CONDA_PREFIX" == "$env_dir" ]]; then
        conda deactivate
        echo "Deactivate conda environment at $env_dir"
    fi
}

function install_dependencies() {
    # check if the env is already setup by finding 'kotaemon' in 'pip list'
    if pip list 2>/dev/null | grep -q "kotaemon"; then
        echo "Requirements are already installed"
    else
        local kotaemon_root="$(pwd)/libs/kotaemon"
        local ktem_root="$(pwd)/libs/ktem/"

        if [ -f "$(pwd)/VERSION" ]; then
            local app_version=$(<"$(pwd)/VERSION")
        else
            local app_version="latest"
        fi

        if [ -f "pyproject.toml" ]; then
            echo "Found pyproject.toml. Installing from source"
            echo "" && echo "Installing libs/kotaemon"
            python -m pip install -e "$kotaemon_root"
            echo "" && echo "Installing libs/ktem"
            python -m pip install -e "$ktem_root"

            python -m pip install --no-deps -e .
        else
            echo "Installing Kotaemon $app_version"
            # Work around for versioning control
            python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon"
            python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem"
            python -m pip install --no-deps "git+https://github.com/Cinnamon/kotaemon.git@$app_version"
        fi

        if ! pip list 2>/dev/null | grep -q "kotaemon"; then
            echo "Installation failed. You may need to run the installer again."
            deactivate_conda_env
            exit 1
        else
            print_highlight "Install finished successfully. Clear cache..."
            "$conda_root/bin/conda" clean --all -y
            python -m pip cache purge

            print_highlight "Do you want to launch the web UI? [Y/N]"
            read -p "Input (yes/no)> " launch
            # Convert user input to lowercase
            local launch=${launch:l}
            if [[ "$launch" != "yes" && "$launch" != "y" && "$launch" != "true" ]]; then
                echo "Will exit now..."
                deactivate_conda_env
                echo "Please run the installer again to launch the UI."
                exit 0
            fi
        fi
    fi
}

function setup_local_model() {
    python $(pwd)/scripts/serve_local.py
}

function download_and_unzip() {
    local url=$1
    local dest_dir=$2

    # Check if the destination directory exists, create if it doesn't
    if [ -d "$dest_dir" ]; then
        echo "Destination directory $dest_dir already exists. Skipping download."
        return
    fi

    mkdir -p "$dest_dir"

    # Download the ZIP file
    local zip_file="${dest_dir}/downloaded.zip"
    echo "Downloading $url to $zip_file"
    curl -L -o "$zip_file" "$url"

    # Unzip the file to the destination directory
    echo "Unzipping $zip_file to $dest_dir"
    unzip -o "$zip_file" -d "$dest_dir"

    # Clean up the downloaded ZIP file
    rm "$zip_file"
    echo "Download and unzip completed successfully."
}

function launch_ui() {
    local pdfjs_prebuilt_dir=$1
    PDFJS_PREBUILT_DIR="$pdfjs_prebuilt_dir" python $(pwd)/app.py || {
        echo "" && echo "Will exit now..."
        exit 1
    }
}

function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

# Main script execution

# move two levels up from the dir where this script resides
cd "$(
    cd -- "$(dirname "$0")" >/dev/null 2>&1
    pwd -P
)" && cd ..

install_dir="$(pwd)/install_dir"
conda_root="${install_dir}/conda"
env_dir="${install_dir}/env"
python_version="3.10"

pdf_js_version="4.0.379"
pdf_js_dist_name="pdfjs-${pdf_js_version}-dist"
pdf_js_dist_url="https://github.com/mozilla/pdf.js/releases/download/v${pdf_js_version}/${pdf_js_dist_name}.zip"
target_pdf_js_dir="$(pwd)/libs/ktem/ktem/assets/prebuilt/${pdf_js_dist_name}"

check_path_for_spaces

print_highlight "Setting up Miniconda"
install_miniconda

print_highlight "Creating conda environment"
create_conda_env "$python_version"
activate_conda_env

print_highlight "Installing requirements"
install_dependencies

print_highlight "Downloading and unzipping PDF.js"
download_and_unzip $pdf_js_dist_url $target_pdf_js_dir

print_highlight "Setting up a local model"
setup_local_model

print_highlight "Launching Kotaemon in your browser, please wait..."
launch_ui $target_pdf_js_dir

deactivate_conda_env

read -p "Press enter to continue"


================================================
FILE: scripts/run_windows.bat
================================================
@ECHO off

:: Main script execution
CD /D "%~dp0\.."

SET /p app_version=<"%CD%\VERSION" || SET app_version=latest
SET install_dir=%CD%\install_dir
SET conda_root=%install_dir%\conda
SET env_dir=%install_dir%\env
SET python_version=3.10
SET miniconda_download_url=https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe

SET git_install_dir=%install_dir%\Git
SET seven_zip_dir=%install_dir%\7zip
:: Determine if the machine is 32-bit or 64-bit
IF "%PROCESSOR_ARCHITECTURE%"=="x86" (
    SET seven_zip_url=https://7-zip.org/a/7z2408.exe
    SET git_download_url=https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/PortableGit-2.46.0-32-bit.7z.exe
) ELSE (
    SET seven_zip_url=https://7-zip.org/a/7z2408-x64.exe
    SET git_download_url=https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/PortableGit-2.46.0-64-bit.7z.exe
)

ECHO %CD%| FINDSTR /C:" " >nul 2>&1
IF %ERRORLEVEL% EQU 0 (
    ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.
    GOTO :end
)

IF NOT EXIST "%install_dir%" ( MKDIR "%install_dir%" )

CALL :print_highlight "Setting up Git"
CALL :download_and_install_git
IF ERRORLEVEL 1 GOTO :end

:: Temporarily add Portable Git to PATH
SET "PATH=%git_install_dir%\bin;%PATH%"

CALL :print_highlight "Setting up Miniconda"
CALL :download_and_install_miniconda
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Creating conda environment"
CALL :create_conda_environment
IF ERRORLEVEL 1 GOTO :end

CALL :activate_environment
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Installing Kotaemon"
CALL :install_dependencies
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Setting up a local model"
CALL :setup_local_model
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Downloading and extracting PDF.js"
CALL :download_and_extract_pdf_js
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Launching Kotaemon in your browser, please wait..."
CALL :launch_ui

CALL :deactivate_environment
GOTO :end_success

:download_and_install_7zip
:: Check if 7-Zip is installed
IF NOT EXIST "%seven_zip_dir%\7z.exe" (
    ECHO Downloading 7-Zip from %seven_zip_url%
    CALL curl -Lk "%seven_zip_url%" -o "%install_dir%\7zip_installer.exe" || (
        ECHO. && ECHO Failed to download 7-Zip. Aborting...
        GOTO :exit_func_with_error
    )
    ECHO Installing 7-Zip to %seven_zip_dir%
    CALL "%install_dir%\7zip_installer.exe" /S /D=%seven_zip_dir%
    DEL "%install_dir%\7zip_installer.exe"
)
ECHO 7-Zip is installed at %seven_zip_dir%

GOTO :eof

:uninstall_7zip
IF EXIST "%seven_zip_dir%\Uninstall.exe" (
    CALL "%seven_zip_dir%\Uninstall.exe" /S
) ELSE (
    ECHO. && ECHO Uninstaller not found. Manually deleting 7-Zip directory...
    RMDIR /S /Q "%seven_zip_dir%"
)

GOTO :eof

:download_and_install_git
:: Check if Git is already installed
CALL "%git_install_dir%\bin\git.exe" --version >nul 2>&1
IF %ERRORLEVEL% NEQ 0 (
    ECHO Install 7-Zip to extract Portable Git. It will be uninstalled automatically after Git installation. && ECHO.
    CALL :download_and_install_7zip
    IF ERRORLEVEL 1 GOTO :end

    ECHO. && ECHO Downloading Portable Git from %git_download_url%
    CALL curl -Lk "%git_download_url%" -o "%install_dir%\portable_git.7z.exe" || (
        ECHO. && ECHO Failed to download Git. Aborting...
        GOTO :exit_func_with_error
    )

    ECHO Extracting Git to %git_install_dir%...
    CALL "%seven_zip_dir%\7z.exe" x "%install_dir%\portable_git.7z.exe" -o"%git_install_dir%" -y >nul || (
        ECHO. && ECHO Failed to extract Git. Aborting...
        GOTO :exit_func_with_error
    )
    DEL "%install_dir%\portable_git.7z.exe"

    ECHO. && ECHO Uninstalling 7-Zip...
    CALL :uninstall_7zip
    IF ERRORLEVEL 1 GOTO :end
)
ECHO Git is installed at %git_install_dir%
:: Recheck Git installation
CALL "%git_install_dir%\bin\git.exe" --version || (
    ECHO. && ECHO Git not found. Aborting...
    GOTO :exit_func_with_error
)

SET "PATH=%git_install_dir%\bin;%PATH%"
ECHO Git is added to PATH for this session

GOTO :eof

:download_and_install_miniconda
:: If conda has been installed at the %conda_root%, don't need to reinstall it
CALL "%conda_root%\_conda.exe" --version >nul 2>&1
IF %ERRORLEVEL% NEQ 0 (
    IF NOT EXIST "%install_dir%\miniconda_installer.exe" (
        ECHO Downloading Miniconda from %miniconda_download_url%
        CALL curl -Lk "%miniconda_download_url%" -o "%install_dir%\miniconda_installer.exe" || (
            ECHO. && ECHO Failed to download Miniconda. Aborting...
            GOTO :exit_func_with_error
        )
    )
    ECHO Installing Miniconda to %conda_root%
    START /wait "" "%install_dir%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%conda_root%
    DEL "%install_dir%\miniconda_installer.exe"
)
ECHO Conda is installed at %conda_root%

:: Recheck conda
ECHO Conda version:
CALL "%conda_root%\_conda.exe" --version || ( ECHO. && ECHO Conda not found. Aborting... && GOTO :exit_func_with_error )

GOTO :eof

:create_conda_environment
:: Create new conda environment if it doesn't exist
IF NOT EXIST %env_dir% (
    ECHO Creating conda environment with python=%python_version% in %env_dir%
    :: Create conda environment. If the interruption happens, rollback and remove the env_dir
    CALL "%conda_root%\_conda.exe" create --no-shortcuts -y -k --prefix %env_dir% python=%python_version% || (
        ECHO. && ECHO Failed to create conda environment. Will delete the %env_dir% and abort now...
        RMDIR /s /q %env_dir%
        GOTO :exit_func_with_error
    )
    ECHO Conda environment created successfully
) ELSE (
    ECHO Conda environment exists at %env_dir%
)
GOTO :eof

:activate_environment
:: Deactivate existing conda env(s) to avoid conflicts
IF EXIST "%conda_root%\condabin\conda.bat" (
    CALL "%conda_root%\condabin\conda.bat" deactivate
    CALL "%conda_root%\condabin\conda.bat" deactivate
    CALL "%conda_root%\condabin\conda.bat" deactivate
)

CALL "%env_dir%\python.exe" --version >nul 2>&1 || (
    ECHO The environment appears to be broken. You may need to remove %env_dir% and run the installer again.
    GOTO :exit_func_with_error
)

CALL "%conda_root%\condabin\conda.bat" activate %env_dir% || (
    ECHO Failed to activate environment. You may need to remove %env_dir% and run the installer again.
    GOTO :exit_func_with_error
)
ECHO Activate conda environment at %env_dir%

GOTO :eof

:deactivate_environment
:: Conda deactivate if we are in the right env
IF "%CONDA_PREFIX%" == "%env_dir%" (
    CALL "%conda_root%\condabin\conda.bat" deactivate
    ECHO Deactivate conda environment at %env_dir%
)
GOTO :eof

:install_dependencies
pip list | findstr /C:"kotaemon" >NUL 2>&1
IF %ERRORLEVEL% == 0  (
    ECHO Dependencies are already installed
) ELSE (
    IF EXIST "pyproject.toml" (
        ECHO Found pyproject.toml. Installing from source...

        ECHO Installing libs\kotaemon
        python -m pip install -e "%CD%\libs\kotaemon"

        ECHO Installing libs\ktem
        python -m pip install -e "%CD%\libs\ktem"

        python -m pip install --no-deps -e .
    ) ELSE (
        ECHO Installing Kotaemon %app_version%
        @REM Work around for versioning control
        python -m pip install git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"#subdirectory=libs/kotaemon
        python -m pip install git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"#subdirectory=libs/ktem
        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"
    )

    ( CALL pip list | findstr /C:"kotaemon" >NUL 2>&1 ) || (
        ECHO. && ECHO Installation failed. You may need to run the installer again.
        CALL :deactivate_environment
        GOTO :exit_func_with_error
    )

    CALL :print_highlight "Install successfully. Clear cache..."
    "%conda_root%\condabin\conda.bat" clean --all -y
    python -m pip cache purge
)
GOTO :eof

:download_and_extract_pdf_js
:: Download and extract a ZIP file from a URL to a destination directory

REM Define variables
set "pdf_js_version=4.0.379"
set "pdf_js_dist_name=pdfjs-%pdf_js_version%-dist"
set "pdf_js_dist_url=https://github.com/mozilla/pdf.js/releases/download/v%pdf_js_version%/%pdf_js_dist_name%.zip"
for /f "delims=" %%i in ('cd') do set "current_dir=%%i"
set "target_pdf_js_dir=%current_dir%\libs\ktem\ktem\assets\prebuilt\%pdf_js_dist_name%"

REM Create the target directory if it does not exist (including parent folders)
if not exist "%target_pdf_js_dir%" (
    echo Creating directory %target_pdf_js_dir%
    mkdir "%target_pdf_js_dir%"
) else (
    echo Directory already exists: %target_pdf_js_dir%
    GOTO :eof
)

REM Download the ZIP file using PowerShell
set "zip_file=%temp%\downloaded.zip"
echo Downloading %url% to %zip_file%
powershell -Command "Invoke-WebRequest -Uri '%pdf_js_dist_url%' -OutFile '%zip_file%'"


REM Extract the ZIP file using PowerShell
echo Extracting %zip_file% to %dest_dir%
powershell -Command "Expand-Archive -Path '%zip_file%' -DestinationPath '%target_pdf_js_dir%'"

REM Clean up the downloaded ZIP file
del "%zip_file%"
echo Download and extraction completed successfully.

goto :eof

:setup_local_model
python "%CD%\scripts\serve_local.py"
GOTO :eof

:launch_ui
:: Workaround for diskcache path with folder start with .
SET THEFLOW_TEMP_PATH=flow_tmp
SET PDFJS_PREBUILT_DIR=%target_pdf_js_dir%
ECHO Starting Kotaemon UI... (prebuilt PDF.js is at %PDFJS_PREBUILT_DIR%)
CALL python -Xutf8 "%CD%\app.py" || ( ECHO. && ECHO Will exit now... && GOTO :exit_func_with_error )
GOTO :eof

:print_highlight
ECHO. && ECHO ******************************************************
ECHO %~1
ECHO ****************************************************** && ECHO.
GOTO :eof

:exit_func_with_error
:: Called inside functions when error happens, then back to the main routine with error code 1
EXIT /B 1

:end_success
:: Exit the script main routine with error code 0 (success)
ECHO Script completed successfully.
PAUSE
EXIT /B 0

:end
:: Exit the script main routine with error code 1 (fail)
PAUSE
EXIT /B 1


================================================
FILE: scripts/serve_local.py
================================================
import platform
import subprocess
from inspect import currentframe, getframeinfo
from pathlib import Path

from decouple import config

system_name = platform.system()

cur_frame = currentframe()
if cur_frame is None:
    raise ValueError("Cannot get the current frame.")
this_file = getframeinfo(cur_frame).filename
this_dir = Path(this_file).parent


def serve_llamacpp_python(local_model_file: Path, **kwargs):
    def guess_chat_format(local_model_file):
        model_name = local_model_file.stem

        # handle known cases that the server backends handle incorrectly
        # this is highly heuristic, should be expand later
        # server backends usually has logic for this but they could still be wrong
        if "qwen" in model_name:
            return "qwen"

        return None

    # default port
    if "port" not in kwargs:
        kwargs["port"] = 31415

    chat_format = guess_chat_format(local_model_file)
    if chat_format:
        kwargs = {**kwargs, "chat_format": chat_format}

    # these scripts create a separate conda env and run the server
    if system_name == "Windows":
        script_file = this_dir / "server_llamacpp_windows.bat"
    elif system_name == "Linux":
        script_file = this_dir / "server_llamacpp_linux.sh"
    elif system_name == "Darwin":
        script_file = this_dir / "server_llamacpp_macos.sh"
    else:
        raise ValueError(f"Unsupported system: {system_name}")

    args = " ".join(f"--{k} {v}" for k, v in kwargs.items())

    cmd = f"{script_file} --model {local_model_file} {args}"
    subprocess.Popen(cmd, shell=True)


def main():
    local_model_file = config("LOCAL_MODEL", default="")

    if not local_model_file:
        print("LOCAL_MODEL not set in the `.env` file.")
        return

    local_model_file = Path(local_model_file)
    if not local_model_file.exists():
        print(f"Local model not found: {local_model_file}")
        return

    print(f"Local model found: {local_model_file}")
    will_start_server = input("Do you want to use this local model ? (y/n): ")

    if will_start_server.lower().strip() not in ["y", "yes"]:
        return

    print("Starting the local server...")
    if local_model_file.suffix == ".gguf":
        serve_llamacpp_python(local_model_file)
    else:
        raise ValueError(f"Unsupported model file type: {local_model_file.suffix}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/server_llamacpp_linux.sh
================================================
#!/bin/bash

# functions used in the main code execution
function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

function path_sanity_check() {
    echo "Path sanity checking"
    if [[ $PWD =~ \  ]]; then
        print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
        exit 1
    fi
}

function deactivate_environment() {
    echo "Deactivate existing environment(s)"
    # deactivate existing conda envs as needed to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
}

function check_conda_existence() {
    echo "Check for conda existence"
    conda_exists="F"

    # figure out whether conda exists
    if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi

    # verify if conda is installed by the main app, if not then raise error
    if [ "$conda_exists" == "F" ]; then
        # test the conda binary
        print_highlight "conda is not installed, seems like the app wasn't installed correctly."
        exit
    fi
}

function create_conda_environment() {
    # create the environment if needed
    if [ ! -e "$INSTALL_ENV_DIR" ]; then
        echo "Create conda environment"
        "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || {
            echo && print_highlight "Conda environment creation failed." && exit 1
        }
    fi

    # check if conda environment was actually created
    if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
        print_highlight "Conda environment was not correctly created."
        exit 1
    fi
}

function isolate_environment() {
    echo "Isolate environment"
    export PYTHONNOUSERSITE=1
    unset PYTHONPATH
    unset PYTHONHOME
}

function activate_environment() {
    echo "Activate conda environment"
    source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
    conda activate "$INSTALL_ENV_DIR"
}

# main code execution

cd "$(dirname "${BASH_SOURCE[0]}")/.."
echo "Changed the current directory to: $(pwd)"

path_sanity_check
deactivate_environment

# config
ENV_NAME="llama-cpp-python-server"
PYTHON_VERSION="3.10"
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"

check_conda_existence
create_conda_environment
isolate_environment
activate_environment

# install dependencies
# ver 0.2.56 produces segment error for /embeddings on MacOS
python -m pip install llama-cpp-python[server]==0.2.55

# start the server with passed params
python -m llama_cpp.server $@

conda deactivate


================================================
FILE: scripts/server_llamacpp_macos.sh
================================================
#!/bin/bash

# functions used in the main code execution
function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

function path_sanity_check() {
    echo "Path sanity checking"
    if [[ "$(pwd)" =~ " " ]]; then
        print_highlight "This script relies on Miniconda which can't be silently installed under a path with spaces. Please run it from a path without spaces."
        exit 1
    fi
}

function deactivate_environment() {
    echo "Deactivate existing environment(s)"
    # deactivate existing conda envs as needed to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null
}

function check_conda_existence() {
    echo "Check for conda existence"
    conda_exists="F"

    # figure out whether conda exists
    if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi

    # verify if conda is installed by the main app, if not then raise error
    if [ "$conda_exists" == "F" ]; then
        # test the conda binary
        print_highlight "conda is not installed, seems like the app wasn't installed correctly."
        exit
    fi
}

function create_conda_environment() {
    # create the environment if needed
    if [ ! -d "${INSTALL_ENV_DIR}" ]; then
        echo "Create conda environment"
        "${CONDA_ROOT_PREFIX}/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python="$PYTHON_VERSION" || (echo && print_highlight "Conda environment creation failed." && exit 1)
    fi

    # check if conda environment was actually created
    if [ ! -f "$INSTALL_ENV_DIR/bin/python" ]; then
        print_highlight "Conda environment was not correctly created."
        exit 1
    fi
}

function isolate_environment() {
    echo "Isolate environment"
    export PYTHONNOUSERSITE=1
    unset PYTHONPATH
    unset PYTHONHOME
}

function activate_environment() {
    echo "Activate conda environment"
    source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
    conda activate "$INSTALL_ENV_DIR"
}

# main code execution

cd "$(
    cd -- "$(dirname "$0")" >/dev/null 2>&1
    pwd -P
)" && cd ..
echo "Changed the current directory to: $(pwd)"

path_sanity_check
deactivate_environment

# config
ENV_NAME="llama-cpp-python-server"
PYTHON_VERSION="3.10"
CONDA_ROOT_PREFIX="$(pwd)/install_dir/conda"
INSTALL_ENV_DIR="$(pwd)/install_dir/server_envs/${ENV_NAME}"

check_conda_existence
create_conda_environment
isolate_environment
activate_environment

# install dependencies
# ver 0.2.56 produces segment error for /embeddings on MacOS
python -m pip install llama-cpp-python[server]==0.2.55

# start the server with passed params
python -m llama_cpp.server $@

conda deactivate


================================================
FILE: scripts/server_llamacpp_windows.bat
================================================
@echo off

@rem main code execution

call :print_highlight "Starting inference server for llama-cpp"

cd /D "%~dp0\.."
echo "Change the current directory to: %cd%"

call :path_sanity_check
call :deactivate_environment

@rem config
set ENV_NAME=llama-cpp-python-server
set PYTHON_VERSION=3.10
set CONDA_ROOT_PREFIX=%cd%\install_dir\conda
set INSTALL_ENV_DIR=%cd%\install_dir\server_envs\%ENV_NAME%

echo "Python version: %PYTHON_VERSION%"
echo "Conda prefix: %CONDA_ROOT_PREFIX%"
echo "Environment path: %INSTALL_ENV_DIR%"

@rem handle conda environment
call :check_conda_existence
call :create_conda_environment
call :isolate_environment
call :activate_environment

@rem install dependencies
@rem ver 0.2.56 produces segment error for /embeddings on MacOS
call python -m pip install llama-cpp-python[server]==0.2.55

@REM @rem start the server with passed params
call python -m llama_cpp.server %*
call conda deactivate

goto :end
@rem the end of main code execution


@rem below are the functions used in the above execution


:print_highlight
echo.
echo ******************************************************
echo %~1
echo ******************************************************
echo.
goto :eof


:path_sanity_check
echo "Path sanity checking"
echo "%cd%"| findstr /C:" " >nul ^
&& (call :print_highlight "This script relies on Miniconda which can not be silently installed under a path with spaces." ^
&& goto :end)
goto :eof


:deactivate_environment
echo "Deactivate existing environment(s)"
(call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
goto :eof


:check_conda_existence
echo "Check for conda existence"
set conda_exists=F

@rem figure out whether conda exists
call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
if "%ERRORLEVEL%" EQU "0" set conda_exists=T

@rem verify if conda is installed by the main app, if not then raise error
if "%conda_exists%" == "F" (
	call :print_highlight "conda is not installed, seems like the app wasn't installed correctly."
    goto :end
)
goto :eof


:create_conda_environment
@rem create the environment if needed
if not exist "%INSTALL_ENV_DIR%" (
    echo "Create conda environment"
	call "%CONDA_ROOT_PREFIX%\_conda.exe" create ^
        --no-shortcuts -y -k --prefix "%INSTALL_ENV_DIR%" python="%PYTHON_VERSION%" || ^
    ( echo. && call :print_highlight "Conda environment creation failed." && goto :end )
)

@rem check if conda environment was actually created
if not exist "%INSTALL_ENV_DIR%\python.exe" (
    call :print_highlight "Conda environment was not correctly created."
    goto :end
)
goto :eof


:isolate_environment
echo "Isolate environment"
set PYTHONNOUSERSITE=1
set PYTHONPATH=
set PYTHONHOME=
goto :eof


:activate_environment
echo "Activate conda environment"
call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ^
( echo. && call :print_highlight "Miniconda hook not found." && goto :end )
goto :eof


:end


================================================
FILE: scripts/update_linux.sh
================================================
#!/bin/bash

# functions for better code organization
function check_path_for_spaces() {
    if [[ $PWD =~ \  ]]; then
        echo "The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later."
        exit 1
    fi
}

function activate_conda_env() {
    # deactivate the current env(s) to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null

    # check if conda env is broken (because of interruption during creation)
    if [ ! -f "$env_dir/bin/python" ]; then
        echo "Conda environment appears to be broken. You may need to remove $env_dir and run the installer again."
        exit 1
    fi

    source "$conda_root/etc/profile.d/conda.sh" # conda init
    conda activate "$env_dir" || {
        echo "Failed to activate environment. Please remove $env_dir and run the installer again"
        exit 1
    }
    echo "Activate conda environment at $CONDA_PREFIX"
}

function deactivate_conda_env() {
    # Conda deactivate if we are in the right env
    if [ "$CONDA_PREFIX" == "$env_dir" ]; then
        conda deactivate
        echo "Deactivate conda environment at $env_dir"
    fi
}

function update_latest() {
    current_version=$(pip list | awk '/kotaemon-app/ {print $2}')
    echo "Current version $current_version"

    if [ -f "pyproject.toml" ]; then
        echo "Source files detected. Please perform git pull manually."
        deactivate_environment
        exit 1
    else
        echo "Installing version: $app_version"
        # Work around for versioning control
        python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon"
        python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem"
        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@$app_version
        if [ $? -ne 0 ]; then
            echo
            echo "Update failed. You may need to run the update again."
            deactivate_environment
            exit 1
        fi
    fi
}

function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

# Main script execution

# move two levels up from the dir where this script resides
cd "$(dirname "${BASH_SOURCE[0]}")" && cd ..

app_version="latest"
install_dir="$(pwd)/install_dir"
conda_root="${install_dir}/conda"
env_dir="${install_dir}/env"

check_path_for_spaces

print_highlight "Activating conda environment"
activate_conda_env

print_highlight "Updating Kotaemon to latest"
update_latest

deactivate_conda_env

read -p "Press enter to continue"


================================================
FILE: scripts/update_macos.sh
================================================
#!/bin/bash

# functions for better code organization
function check_path_for_spaces() {
    if [[ $PWD =~ \  ]]; then
        echo "The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later."
        exit 1
    fi
}

function activate_conda_env() {
    # deactivate the current env(s) to avoid conflicts
    { conda deactivate && conda deactivate && conda deactivate; } 2>/dev/null

    # check if conda env is broken (because of interruption during creation)
    if [ ! -f "$env_dir/bin/python" ]; then
        echo "Conda environment appears to be broken. You may need to remove $env_dir and run the installer again."
        exit 1
    fi

    source "$conda_root/etc/profile.d/conda.sh" # conda init
    conda activate "$env_dir" || {
        echo "Failed to activate environment. Please remove $env_dir and run the installer again"
        exit 1
    }
    echo "Activate conda environment at $CONDA_PREFIX"
}

function deactivate_conda_env() {
    # Conda deactivate if we are in the right env
    if [ "$CONDA_PREFIX" == "$env_dir" ]; then
        conda deactivate
        echo "Deactivate conda environment at $env_dir"
    fi
}

function update_latest() {
    current_version=$(pip list | awk '/kotaemon-app/ {print $2}')
    echo "Current version $current_version"

    if [ -f "pyproject.toml" ]; then
        echo "Source files detected. Please perform git pull manually."
        deactivate_conda_env
        exit 1
    else
        echo "Installing version: $app_version"
        # Work around for versioning control
        python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/kotaemon"
        python -m pip install "git+https://github.com/Cinnamon/kotaemon.git@$app_version#subdirectory=libs/ktem"
        python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@$app_version
        if [ $? -ne 0 ]; then
            echo
            echo "Update failed. You may need to run the update again."
            deactivate_conda_env
            exit 1
        fi
    fi
}

function print_highlight() {
    local message="${1}"
    echo "" && echo "******************************************************"
    echo $message
    echo "******************************************************" && echo ""
}

# Main script execution

# move two levels up from the dir where this script resides
cd "$(dirname "${BASH_SOURCE[0]}")" && cd ..

app_version="latest"
install_dir="$(pwd)/install_dir"
conda_root="${install_dir}/conda"
env_dir="${install_dir}/env"

check_path_for_spaces

print_highlight "Activating conda environment"
activate_conda_env

print_highlight "Updating Kotaemon to latest"
update_latest

deactivate_conda_env

read -p "Press enter to continue"


================================================
FILE: scripts/update_windows.bat
================================================
@ECHO off

:: Main script execution
CD /D "%~dp0\.."

SET app_version=latest
SET install_dir=%CD%\install_dir
SET conda_root=%install_dir%\conda
SET env_dir=%install_dir%\env

ECHO %CD%| FINDSTR /C:" " >nul 2>&1
IF %ERRORLEVEL% EQU 0 (
    ECHO The current workdir has whitespace which can lead to unintended behaviour. Please modify your path and continue later.
    GOTO :end
)

CALL :print_highlight "Activating conda environment"
CALL :activate_environment
IF ERRORLEVEL 1 GOTO :end

CALL :print_highlight "Updating Kotaemon to latest"
CALL :update_latest
IF ERRORLEVEL 1 GOTO :end

CALL :deactivate_environment
GOTO :end_success


:activate_environment
:: deactivate existing conda env(s) to avoid conflicts
( CALL conda deactivate && CALL conda deactivate && CALL conda deactivate ) 2> nul

CALL "%env_dir%\python.exe" --version >nul 2>&1 || (
    ECHO The environment appears to be broken. You may need to remove %env_dir% and run the installer again.
    GOTO :exit_func_with_error
)

CALL "%conda_root%\condabin\conda.bat" activate %env_dir% || (
    ECHO Failed to activate environment. You may need to remove %env_dir% and run the installer again.
    GOTO :exit_func_with_error
)
ECHO Activate conda environment at %env_dir%

GOTO :eof

:deactivate_environment
:: Conda deactivate if we are in the right env
IF "%CONDA_PREFIX%" == "%env_dir%" (
    CALL "%conda_root%\condabin\conda.bat" deactivate
    ECHO Deactivate conda environment at %env_dir%
)
GOTO :eof

:update_latest
FOR /F "tokens=1,2" %%a in ('pip list') do if "%%a"=="kotaemon-app" set current_version=%%b
ECHO Current version %current_version%

IF EXIST "pyproject.toml" (
    ECHO Source files detected. Please perform git pull manually.
    CALL :deactivate_environment
    GOTO :exit_func_with_error
) ELSE (
    ECHO Installing version: %app_version%
    @REM Work around for versioning control
    python -m pip install git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"#subdirectory=libs/kotaemon
    python -m pip install git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"#subdirectory=libs/ktem
    python -m pip install --no-deps git+https://github.com/Cinnamon/kotaemon.git@"%app_version%"
) || (
    ECHO. && ECHO Update failed. You may need to run the update again.
    CALL :deactivate_environment
    GOTO :exit_func_with_error
)

CALL :print_highlight "Update successfully."
FOR /F "tokens=1,2" %%a in ('pip list') do if "%%a"=="kotaemon-app" set updated_version=%%b
ECHO Updated version %updated_version%
ECHO %updated_version% > VERSION
GOTO :eof

:print_highlight
ECHO. && ECHO ******************************************************
ECHO %~1
ECHO ****************************************************** && ECHO.
GOTO :eof

:exit_func_with_error
:: Called inside functions when error happens, then back to the main routine with error code 1
EXIT /B 1

:end_success
:: Exit the script main routine with error code 0 (success)
ECHO Script completed successfully.
PAUSE
EXIT /B 0

:end
:: Exit the script main routine with error code 1 (fail)
PAUSE
EXIT /B 1


================================================
FILE: settings.yaml.example
================================================
# This is a sample GraphRAG settings.yaml file that allows users to run the GraphRAG index process with their customized parameters.
# The parameters in this file will only take effect when the USE_CUSTOMIZED_GRAPHRAG_SETTING is true in .env file.
# For a comprehensive understanding of GraphRAG parameters, please refer to: https://microsoft.github.io/graphrag/config/json_yaml/.

encoding_model: cl100k_base
skip_workflows: []
llm:
  api_key: ${GRAPHRAG_API_KEY}
  type: openai_chat # or azure_openai_chat
  api_base: http://127.0.0.1:11434/v1
  model: qwen2
  model_supports_json: true # recommended if this is available for your model.
  # max_tokens: 4000
  request_timeout: 1800.0
  # api_base: https://<instance>.openai.azure.com
  # api_version: 2024-02-15-preview
  # organization: <organization_id>
  # deployment_name: <azure_model_deployment_name>
  # tokens_per_minute: 150_000 # set a leaky bucket throttle
  # requests_per_minute: 10_000 # set a leaky bucket throttle
  # max_retries: 10
  # max_retry_wait: 10.0
  # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
  concurrent_requests: 5 # the number of parallel inflight requests that may be made
  # temperature: 0 # temperature for sampling
  # top_p: 1 # top-p sampling
  # n: 1 # Number of completions to generate

parallelization:
  stagger: 0.3
  # num_threads: 50 # the number of threads to use for parallel processing

async_mode: threaded # or asyncio

embeddings:
  ## parallelization: override the global parallelization settings for embeddings
  async_mode: threaded # or asyncio
  # target: required # or all
  # batch_size: 16 # the number of documents to send in a single request
  # batch_max_tokens: 8191 # the maximum number of tokens to send in a single request
  llm:
    api_base: http://localhost:11434/v1
    api_key: ${GRAPHRAG_API_KEY}
    model: nomic-embed-text
    type: openai_embedding
    # api_base: https://<instance>.openai.azure.com
    # api_version: 2024-02-15-preview
    # organization: <organization_id>
    # deployment_name: <azure_model_deployment_name>
    # tokens_per_minute: 150_000 # set a leaky bucket throttle
    # requests_per_minute: 10_000 # set a leaky bucket throttle
    # max_retries: 10
    # max_retry_wait: 10.0
    # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
    # concurrent_requests: 25 # the number of parallel inflight requests that may be made

chunks:
  size: 1200
  overlap: 100
  group_by_columns: [id] # by default, we don't allow chunks to cross documents

input:
  type: file # or blob
  file_type: text # or csv
  base_dir: "input"
  file_encoding: utf-8
  file_pattern: ".*\\.txt$"

cache:
  type: file # or blob
  base_dir: "cache"
  # connection_string: <azure_blob_storage_connection_string>
  # container_name: <azure_blob_storage_container_name>

storage:
  type: file # or blob
  base_dir: "output"
  # connection_string: <azure_blob_storage_connection_string>
  # container_name: <azure_blob_storage_container_name>

reporting:
  type: file # or console, blob
  base_dir: "output"
  # connection_string: <azure_blob_storage_connection_string>
  # container_name: <azure_blob_storage_container_name>

entity_extraction:
  ## strategy: fully override the entity extraction strategy.
  ##   type: one of graph_intelligence, graph_intelligence_json and nltk
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/entity_extraction.txt"
  entity_types: [organization,person,geo,event]
  max_gleanings: 1

summarize_descriptions:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/summarize_descriptions.txt"
  max_length: 500

claim_extraction:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  # enabled: true
  prompt: "prompts/claim_extraction.txt"
  description: "Any claims or facts that could be relevant to information discovery."
  max_gleanings: 1

community_reports:
  ## llm: override the global llm settings for this task
  ## parallelization: override the global parallelization settings for this task
  ## async_mode: override the global async_mode settings for this task
  prompt: "prompts/community_report.txt"
  max_length: 2000
  max_input_length: 8000

cluster_graph:
  max_cluster_size: 10

embed_graph:
  enabled: false # if true, will generate node2vec embeddings for nodes
  # num_walks: 10
  # walk_length: 40
  # window_size: 2
  # iterations: 3
  # random_seed: 597832

umap:
  enabled: false # if true, will generate UMAP embeddings for nodes

snapshots:
  graphml: false
  raw_entities: false
  top_level_nodes: false

local_search:
  # text_unit_prop: 0.5
  # community_prop: 0.1
  # conversation_history_max_turns: 5
  # top_k_mapped_entities: 10
  # top_k_relationships: 10
  # llm_temperature: 0 # temperature for sampling
  # llm_top_p: 1 # top-p sampling
  # llm_n: 1 # Number of completions to generate
  # max_tokens: 12000

global_search:
  # llm_temperature: 0 # temperature for sampling
  # llm_top_p: 1 # top-p sampling
  # llm_n: 1 # Number of completions to generate
  # max_tokens: 12000
  # data_max_tokens: 12000
  # map_max_tokens: 1000
  # reduce_max_tokens: 2000
  # concurrency: 32


================================================
FILE: sso_app.py
================================================
import os

import gradiologin as grlogin
from decouple import config
from fastapi import FastAPI
from fastapi.responses import FileResponse
from theflow.settings import settings as flowsettings

KH_APP_DATA_DIR = getattr(flowsettings, "KH_APP_DATA_DIR", ".")
GRADIO_TEMP_DIR = os.getenv("GRADIO_TEMP_DIR", None)
AUTHENTICATION_METHOD = config("AUTHENTICATION_METHOD", "GOOGLE")

# override GRADIO_TEMP_DIR if it's not set
if GRADIO_TEMP_DIR is None:
    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, "gradio_tmp")
    os.environ["GRADIO_TEMP_DIR"] = GRADIO_TEMP_DIR

# for authentication with Google
GOOGLE_CLIENT_ID = config("GOOGLE_CLIENT_ID", default="")
GOOGLE_CLIENT_SECRET = config("GOOGLE_CLIENT_SECRET", default="")

# for authentication with Open ID by keycloak
KEYCLOAK_SERVER_URL = config("KEYCLOAK_SERVER_URL", default="")
KEYCLOAK_REALM = config("KEYCLOAK_REALM", default="")
KEYCLOAK_CLIENT_ID = config("KEYCLOAK_CLIENT_ID", default="")
KEYCLOAK_CLIENT_SECRET = config("KEYCLOAK_CLIENT_SECRET", default="")

from ktem.main import App  # noqa

gradio_app = App()
demo = gradio_app.make()

app = FastAPI()

if AUTHENTICATION_METHOD == "KEYCLOAK":
    # for authentication with Open ID by keycloak
    grlogin.register(
        name="keycloak",
        server_metadata_url=(
            f"{KEYCLOAK_SERVER_URL}/realms/{KEYCLOAK_REALM}/"
            ".well-known/openid-configuration"
        ),
        client_id=KEYCLOAK_CLIENT_ID,
        client_secret=KEYCLOAK_CLIENT_SECRET,
        client_kwargs={
            "scope": "openid email profile",
        },
    )

else:
    # for authentication with Google
    grlogin.register(
        name="google",
        server_metadata_url=(
            "https://accounts.google.com/.well-known/openid-configuration"
        ),
        client_id=GOOGLE_CLIENT_ID,
        client_secret=GOOGLE_CLIENT_SECRET,
        client_kwargs={
            "scope": "openid email profile",
        },
    )


@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
    return FileResponse(gradio_app._favicon)


grlogin.mount_gradio_app(
    app,
    demo,
    "/app",
    allowed_paths=[
        "libs/ktem/ktem/assets",
        GRADIO_TEMP_DIR,
    ],
)


================================================
FILE: sso_app_demo.py
================================================
import os

import gradio as gr
from authlib.integrations.starlette_client import OAuth, OAuthError
from decouple import config
from fastapi import FastAPI, Request
from fastapi.responses import FileResponse
from starlette.config import Config
from starlette.middleware.sessions import SessionMiddleware
from starlette.responses import RedirectResponse
from theflow.settings import settings as flowsettings

KH_DEMO_MODE = getattr(flowsettings, "KH_DEMO_MODE", False)
KH_APP_DATA_DIR = getattr(flowsettings, "KH_APP_DATA_DIR", ".")
GRADIO_TEMP_DIR = os.getenv("GRADIO_TEMP_DIR", None)
# override GRADIO_TEMP_DIR if it's not set
if GRADIO_TEMP_DIR is None:
    GRADIO_TEMP_DIR = os.path.join(KH_APP_DATA_DIR, "gradio_tmp")
    os.environ["GRADIO_TEMP_DIR"] = GRADIO_TEMP_DIR


GOOGLE_CLIENT_ID = config("GOOGLE_CLIENT_ID", default="")
GOOGLE_CLIENT_SECRET = config("GOOGLE_CLIENT_SECRET", default="")
SECRET_KEY = config("SECRET_KEY", default="default-secret-key")


def add_session_middleware(app):
    config_data = {
        "GOOGLE_CLIENT_ID": GOOGLE_CLIENT_ID,
        "GOOGLE_CLIENT_SECRET": GOOGLE_CLIENT_SECRET,
    }
    starlette_config = Config(environ=config_data)
    oauth = OAuth(starlette_config)
    oauth.register(
        name="google",
        server_metadata_url=(
            "https://accounts.google.com/" ".well-known/openid-configuration"
        ),
        client_kwargs={"scope": "openid email profile"},
    )

    app.add_middleware(SessionMiddleware, secret_key=SECRET_KEY)
    return oauth


from ktem.main import App  # noqa

gradio_app = App()
main_demo = gradio_app.make()

app = FastAPI()
oauth = add_session_middleware(app)


@app.get("/")
def public(request: Request):
    root_url = gr.route_utils.get_root_url(request, "/", None)
    return RedirectResponse(url=f"{root_url}/app/")


@app.get("/favicon.ico", include_in_schema=False)
async def favicon():
    return FileResponse(gradio_app._favicon)


@app.route("/logout")
async def logout(request: Request):
    request.session.pop("user", None)
    return RedirectResponse(url="/")


@app.route("/login")
async def login(request: Request):
    root_url = gr.route_utils.get_root_url(request, "/login", None)
    redirect_uri = f"{root_url}/auth"
    return await oauth.google.authorize_redirect(request, redirect_uri)


@app.route("/auth")
async def auth(request: Request):
    try:
        access_token = await oauth.google.authorize_access_token(request)
    except OAuthError:
        return RedirectResponse(url="/")
    request.session["user"] = dict(access_token)["userinfo"]
    return RedirectResponse(url="/")


app = gr.mount_gradio_app(
    app,
    main_demo,
    path="/app",
    allowed_paths=[
        "libs/ktem/ktem/assets",
        GRADIO_TEMP_DIR,
    ],
)


================================================
FILE: templates/component-default/README.md
================================================


================================================
FILE: templates/project-default/cookiecutter.json
================================================
{
    "project_name": "prj_kotaemon",
    "ptl": "john"
}


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/.gitattributes
================================================
.gitattributes text eol=lf
.gitignore text eol=lf
*.build text eol=lf
*.c text eol=lf
*.cmake text eol=lf
*.cpp text eol=lf
*.csv text eol=lf
*.f text eol=lf
*.f90 text eol=lf
*.for text eol=lf
*.grc text eol=lf
*.h text eol=lf
*.ipynb text eol=lf
*.m text eol=lf
*.md text eol=lf
*.pas text eol=lf
*.py text eol=lf
*.rst text eol=lf
*.sh text eol=lf
*.txt text eol=lf
*.yml text eol=lf
Makefile text eol=lf
*.html linguist-documentation


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/.gitignore
================================================
# Created by https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm
# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm

### Emacs ###
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*

# Org-mode
.org-id-locations
*_archive

# flymake-mode
*_flymake.*

# eshell files
/eshell/history
/eshell/lastdir

# elpa packages
/elpa/

# reftex files
*.rel

# AUCTeX auto folder
/auto/

# cask packages
.cask/
dist/

# Flycheck
flycheck_*.el

# server auth directory
/server/

# projectiles files
.projectile

# directory configuration
.dir-locals.el

# network security
/network-security.data

### Linux ###

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*

### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### macOS Patch ###
# iCloud generated files
*.icloud

### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# AWS User-specific
.idea/**/aws.xml

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn.  Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# SonarLint plugin
.idea/sonarlint/

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
# https://plugins.jetbrains.com/plugin/7973-sonarlint
.idea/**/sonarlint/

# SonarQube Plugin
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
.idea/**/sonarIssues.xml

# Markdown Navigator plugin
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
.idea/**/markdown-navigator.xml
.idea/**/markdown-navigator-enh.xml
.idea/**/markdown-navigator/

# Cache file creation bug
# See https://youtrack.jetbrains.com/issue/JBR-2257
.idea/$CACHE_FILE$

# CodeStream plugin
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml

# Azure Toolkit for IntelliJ plugin
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
.idea/**/azureSettings.xml

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml

# ruff
.ruff_cache/

# LSP config files
pyrightconfig.json

### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]

# Session
Session.vim
Sessionx.vim

# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~

### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide

### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db

# Dump file
*.stackdump

# Folder config file
[Dd]esktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp

# Windows shortcuts
*.lnk

.theflow/

# End of https://www.toptal.com/developers/gitignore/api/python,linux,macos,windows,vim,emacs,visualstudiocode,pycharm

logs/
.gitsecret/keys/random_seed
!*.secret
credentials.txt

S.gpg-agent*
.vscode/settings.json


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/.pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.3.0
    hooks:
      - id: check-yaml
      - id: check-toml
      - id: end-of-file-fixer
      - id: trailing-whitespace
      - id: detect-aws-credentials
        args: ["--allow-missing-credentials"]
      - id: detect-private-key
      - id: check-added-large-files
  - repo: https://github.com/ambv/black
    rev: 22.3.0
    hooks:
      - id: black
        language_version: python3
  - repo: https://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
      - id: isort
        args: ["--profile", "black"]
        language_version: python3.10
  - repo: https://github.com/pycqa/flake8
    rev: 4.0.1
    hooks:
      - id: flake8
        args: ["--max-line-length", "88", "--extend-ignore", "E203"]
  - repo: https://github.com/myint/autoflake
    rev: v1.4
    hooks:
      - id: autoflake
        args:
          [
            "--in-place",
            "--remove-unused-variables",
            "--remove-all-unused-imports",
            "--ignore-init-module-imports",
            "--exclude=tests/*",
          ]
  - repo: https://github.com/pre-commit/mirrors-prettier
    rev: v2.7.1
    hooks:
      - id: prettier
        types_or: [markdown, yaml]
  - repo: https://github.com/pre-commit/mirrors-mypy
    rev: "v1.5.1"
    hooks:
      - id: mypy
        additional_dependencies: [types-PyYAML==6.0.12.11, "types-requests"]
        args: ["--check-untyped-defs", "--ignore-missing-imports"]


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/README.md
================================================
<div align="center">

# Project {{ cookiecutter.project_name }}

[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/Cinnamon/kotaemon)

</div>

# Install

```bash
# Create new conda env (optional)
conda create -n {{ cookiecutter.project_name }} python=3.10
conda activate {{ cookiecutter.project_name }}

# Clone and install the project
git clone "<{{ cookiecutter.project_name }}-repo>"
cd "<{{ cookiecutter.project_name }}-repo>"
pip install -e .

# Generate the project structure
cd ..
kh start-project
```

# Usage

- Build the pipeline in `pipeline.py`

For supported utilities and tools, refer: https://github.com/Cinnamon/kotaemon/wiki/Utilities

# Contribute

- For project issues and errors, please report in this repo issues.
- For kotaemon issues and errors, please report or make PR fixes in https://github.com/Cinnamon/kotaemon.git
- If the template for this project has issues and errors, please report or make
  PR fixes in https://github.com/Cinnamon/kotaemon/tree/main/templates/project-default


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/setup.py
================================================
import setuptools

setuptools.setup(
    name="{{ cookiecutter.project_name }}",
    version="0.0.1",
    author="{{ cookiecutter.ptl }}",
    author_email="{{ cookiecutter.ptl }}@cinnamon.is",
    description="Project {{ cookiecutter.project_name }}",
    long_description="Project {{ cookiecutter.project_name }}",
    url="https://github.com/Cinnamon/kotaemon",
    python_requires=">=3",
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
    install_requires=[
        "kotaemon@git+ssh://git@github.com/Cinnamon/kotaemon.git",
    ],
)


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/tests/__init__.py
================================================


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/{{cookiecutter.project_name}}/__init__.py
================================================


================================================
FILE: templates/project-default/{{cookiecutter.project_name}}/{{cookiecutter.project_name}}/pipeline.py
================================================
import os
from typing import List

from kotaemon.base import BaseComponent, Document, LLMInterface, Node, Param, lazy
from kotaemon.contribs.promptui.logs import ResultLog
from kotaemon.embeddings import LCAzureOpenAIEmbeddings
from kotaemon.indices import VectorIndexing, VectorRetrieval
from kotaemon.llms import LCAzureChatOpenAI
from kotaemon.storages import ChromaVectorStore, SimpleFileDocumentStore


class QAResultLog(ResultLog):
    @staticmethod
    def _get_prompt(obj):
        return obj["prompt"]


class QuestionAnsweringPipeline(BaseComponent):

    _promptui_resultlog = QAResultLog
    _promptui_outputs: list = [
        {
            "step": ".prompt",
            "getter": "_get_prompt",
            "component": "text",
            "params": {"label": "Constructed prompt to LLM"},
        },
        {
            "step": ".",
            "getter": "_get_output",
            "component": "text",
            "params": {"label": "Answer"},
        },
    ]

    retrieval_top_k: int = 1
    llm: LCAzureChatOpenAI = LCAzureChatOpenAI.withx(
        azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
        openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
        openai_api_version="2023-03-15-preview",
        deployment_name="dummy-q2-gpt35",
        temperature=0,
        request_timeout=60,
    )

    retrieving_pipeline: VectorRetrieval = Node(
        VectorRetrieval.withx(
            vector_store=lazy(ChromaVectorStore).withx(path="./tmp"),
            doc_store=lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
            embedding=LCAzureOpenAIEmbeddings.withx(
                model="text-embedding-ada-002",
                deployment="dummy-q2-text-embedding",
                azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
                openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
            ),
        ),
        ignore_ui=True,
    )

    def run(self, text: str) -> LLMInterface:
        # retrieve relevant documents as context
        matched_texts: List[str] = [
            _.text
            for _ in self.retrieving_pipeline(text, top_k=int(self.retrieval_top_k))
        ]
        context = "\n".join(matched_texts)

        # generate the answer
        prompt = f'Answer the following question: "{text}". The context is: \n{context}'
        self.log_progress(".prompt", prompt=prompt)

        return self.llm(prompt).text


class IndexingPipeline(VectorIndexing):

    vector_store: ChromaVectorStore = Param(
        lazy(ChromaVectorStore).withx(path="./tmp"),
        ignore_ui=True,
    )
    doc_store: SimpleFileDocumentStore = Param(
        lazy(SimpleFileDocumentStore).withx(path="docstore.json"),
        ignore_ui=True,
    )
    embedding: LCAzureOpenAIEmbeddings = LCAzureOpenAIEmbeddings.withx(
        model="text-embedding-ada-002",
        deployment="dummy-q2-text-embedding",
        azure_endpoint="https://bleh-dummy-2.openai.azure.com/",
        openai_api_key=os.environ.get("OPENAI_API_KEY", "default-key"),
    )

    def run(self, text: str) -> Document:
        """Normally, this indexing pipeline returns nothing. For demonstration,
        we want it to return something, so let's return the number of documents
        in the vector store
        """
        super().run(text)

        if self.doc_store is not None:
            # persist to local anytime an indexing is created
            # this can be bypassed when we have a FileDocumentStore
            self.doc_store.save("docstore.json")

        return Document(self.vector_store._collection.count())