Full Code of BBC-Esq/ChromaDB-Plugin-for-LM-Studio for AI

main d99ae7577a79 cached

90 files

2.8 MB

725.8k tokens

1108 symbols

1 requests

Download .txt

Showing preview only (3,027K chars total). Download the full file or copy to clipboard to get everything.

Repository: BBC-Esq/ChromaDB-Plugin-for-LM-Studio
Branch: main
Commit: d99ae7577a79
Files: 90
Total size: 2.8 MB

Directory structure:
gitextract_gve6w__6/

├── .gitignore
├── Assets/
│   ├── SentenceTransformer.py
│   ├── core.py
│   ├── user_manual_consolidated.md
│   └── vision_model_table.html
├── CSS/
│   └── template.css
├── README.md
├── Tokenizer/
│   ├── special_tokens_map.json
│   ├── tokenizer.json
│   ├── tokenizer.model
│   └── tokenizer_config.json
├── __main__.py
├── charts/
│   ├── __init__.py
│   ├── all_gpus.py
│   ├── gpu_info.py
│   ├── models_chat.py
│   ├── models_vector.py
│   └── models_vision.py
├── chat/
│   ├── __init__.py
│   ├── base.py
│   ├── jeeves.py
│   ├── kobold.py
│   ├── lm_studio.py
│   ├── local_model.py
│   ├── minimax.py
│   └── openai.py
├── core/
│   ├── __init__.py
│   ├── chatgpt_settings.py
│   ├── config.py
│   ├── constants.py
│   ├── extract_metadata.py
│   ├── initialize.py
│   └── utilities.py
├── db/
│   ├── __init__.py
│   ├── choose_documents.py
│   ├── create_symlinks.py
│   ├── cuda_manager.py
│   ├── database_interactions.py
│   ├── document_processor.py
│   ├── embedding_models.py
│   ├── process_manager.py
│   ├── sqlite_operations.py
│   ├── stage_extract.py
│   ├── stage_split.py
│   └── stage_tokenize.py
├── gui/
│   ├── __init__.py
│   ├── credentials.py
│   ├── dialogs/
│   │   ├── __init__.py
│   │   ├── ai_backends_dialog.py
│   │   ├── chatgpt_tab.py
│   │   ├── kobold_tab.py
│   │   ├── lm_studio_tab.py
│   │   └── minimax_tab.py
│   ├── download_model.py
│   ├── main_window.py
│   ├── metrics_bar.py
│   ├── tabs.py
│   ├── tabs_databases/
│   │   ├── __init__.py
│   │   ├── create.py
│   │   ├── manage.py
│   │   └── query.py
│   ├── tabs_models/
│   │   ├── __init__.py
│   │   └── models.py
│   ├── tabs_settings/
│   │   ├── __init__.py
│   │   ├── database_create.py
│   │   ├── database_query.py
│   │   ├── settings.py
│   │   ├── tts.py
│   │   └── vision.py
│   └── tabs_tools/
│       ├── __init__.py
│       ├── misc.py
│       ├── ocr.py
│       ├── scrape.py
│       ├── tools.py
│       ├── transcribe.py
│       └── vision.py
├── gui.py
├── modules/
│   ├── __init__.py
│   ├── kokoro.py
│   ├── ocr.py
│   ├── process_images.py
│   ├── scraper.py
│   ├── transcribe.py
│   ├── tts.py
│   └── voice_recorder.py
├── setup_windows.py
└── tools/
    ├── __init__.py
    ├── check_packages.py
    ├── chunk_userguide.py
    └── replace_sourcecode.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Virtual environment
Lib/
Scripts/
Include/
pyvenv.cfg

# Python
__pycache__/
*.pyc
*.pyo

# Models (large binary files)
Models/

# User data
Vector_DB/
Vector_DB_Backup/
Docs_for_DB/
Scraped_Documentation/

# Test files
TEST - FILES/

# Build artifacts
*.egg-info/

# Config (contains user API keys)
config.yaml

# Share (installed by tessdata pip packages)
share/

# Misc
ffmpeg.exe
push_to_github.ps1
metadata.txt
chat_history.txt
*.log

.claude/
.lock

# Tests (local only)
tests/


================================================
FILE: Assets/SentenceTransformer.py
================================================
# modified 4.1.0 to modify "_text_length" method and add debugging
from __future__ import annotations

import copy
import importlib
import json
import logging
import math
import os
import queue
import shutil
import sys
import logging, sys
import logging

import tempfile
import traceback
import warnings
from collections import OrderedDict
from collections.abc import Iterable, Iterator
from contextlib import contextmanager
from multiprocessing import Queue
from pathlib import Path
from typing import Any, Callable, Literal, overload

import numpy as np
import numpy.typing as npt
import torch
import torch.multiprocessing as mp
import transformers
from huggingface_hub import HfApi
from packaging import version
from torch import Tensor, device, nn
from tqdm.autonotebook import trange
from transformers import is_torch_npu_available
from transformers.dynamic_module_utils import get_class_from_dynamic_module, get_relative_import_files

from sentence_transformers.model_card import SentenceTransformerModelCardData, generate_model_card
from sentence_transformers.similarity_functions import SimilarityFunction

from . import __MODEL_HUB_ORGANIZATION__, __version__
from .evaluation import SentenceEvaluator
from .fit_mixin import FitMixin
from .models import Normalize, Pooling, Transformer
from .peft_mixin import PeftAdapterMixin
from .quantization import quantize_embeddings
from .util import (
    batch_to_device,
    get_device_name,
    import_from_string,
    is_sentence_transformer_model,
    load_dir_path,
    load_file_path,
    save_to_hub_args_decorator,
    truncate_embeddings,
)

logger = logging.getLogger(__name__)


class SentenceTransformer(nn.Sequential, FitMixin, PeftAdapterMixin):
    """
    Loads or creates a SentenceTransformer model that can be used to map sentences / text to embeddings.

    Args:
        model_name_or_path (str, optional): If it is a filepath on disc, it loads the model from that path. If it is not a path,
            it first tries to download a pre-trained SentenceTransformer model. If that fails, tries to construct a model
            from the Hugging Face Hub with that name.
        modules (Iterable[nn.Module], optional): A list of torch Modules that should be called sequentially, can be used to create custom
            SentenceTransformer models from scratch.
        device (str, optional): Device (like "cuda", "cpu", "mps", "npu") that should be used for computation. If None, checks if a GPU
            can be used.
        prompts (Dict[str, str], optional): A dictionary with prompts for the model. The key is the prompt name, the value is the prompt text.
            The prompt text will be prepended before any text to encode. For example:
            `{"query": "query: ", "passage": "passage: "}` or `{"clustering": "Identify the main category based on the
            titles in "}`.
        default_prompt_name (str, optional): The name of the prompt that should be used by default. If not set,
            no prompt will be applied.
        similarity_fn_name (str or SimilarityFunction, optional): The name of the similarity function to use. Valid options are "cosine", "dot",
            "euclidean", and "manhattan". If not set, it is automatically set to "cosine" if `similarity` or
            `similarity_pairwise` are called while `model.similarity_fn_name` is still `None`.
        cache_folder (str, optional): Path to store models. Can also be set by the SENTENCE_TRANSFORMERS_HOME environment variable.
        trust_remote_code (bool, optional): Whether or not to allow for custom models defined on the Hub in their own modeling files.
            This option should only be set to True for repositories you trust and in which you have read the code, as it
            will execute code present on the Hub on your local machine.
        revision (str, optional): The specific model version to use. It can be a branch name, a tag name, or a commit id,
            for a stored model on Hugging Face.
        local_files_only (bool, optional): Whether or not to only look at local files (i.e., do not try to download the model).
        token (bool or str, optional): Hugging Face authentication token to download private models.
        use_auth_token (bool or str, optional): Deprecated argument. Please use `token` instead.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` does no truncation. Truncation is
            only applicable during inference when :meth:`SentenceTransformer.encode` is called.
        model_kwargs (Dict[str, Any], optional): Additional model configuration parameters to be passed to the Hugging Face Transformers model.
            Particularly useful options are:

            - ``torch_dtype``: Override the default `torch.dtype` and load the model under a specific `dtype`.
              The different options are:

                    1. ``torch.float16``, ``torch.bfloat16`` or ``torch.float``: load in a specified
                    ``dtype``, ignoring the model's ``config.torch_dtype`` if one exists. If not specified - the model will
                    get loaded in ``torch.float`` (fp32).

                    2. ``"auto"`` - A ``torch_dtype`` entry in the ``config.json`` file of the model will be
                    attempted to be used. If this entry isn't found then next check the ``dtype`` of the first weight in
                    the checkpoint that's of a floating point type and use that as ``dtype``. This will load the model
                    using the ``dtype`` it was saved in at the end of the training. It can't be used as an indicator of how
                    the model was trained. Since it could be trained in one of half precision dtypes, but saved in fp32.
            - ``attn_implementation``: The attention implementation to use in the model (if relevant). Can be any of
              `"eager"` (manual implementation of the attention), `"sdpa"` (using `F.scaled_dot_product_attention
              <https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html>`_),
              or `"flash_attention_2"` (using `Dao-AILab/flash-attention <https://github.com/Dao-AILab/flash-attention>`_).
              By default, if available, SDPA will be used for torch>=2.1.1. The default is otherwise the manual `"eager"`
              implementation.
            - ``provider``: If backend is "onnx", this is the provider to use for inference, for example "CPUExecutionProvider",
              "CUDAExecutionProvider", etc. See https://onnxruntime.ai/docs/execution-providers/ for all ONNX execution providers.
            - ``file_name``: If backend is "onnx" or "openvino", this is the file name to load, useful for loading optimized
              or quantized ONNX or OpenVINO models.
            - ``export``: If backend is "onnx" or "openvino", then this is a boolean flag specifying whether this model should
              be exported to the backend. If not specified, the model will be exported only if the model repository or directory
              does not already contain an exported model.

            See the `PreTrainedModel.from_pretrained
            <https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained>`_
            documentation for more details.
        tokenizer_kwargs (Dict[str, Any], optional): Additional tokenizer configuration parameters to be passed to the Hugging Face Transformers tokenizer.
            See the `AutoTokenizer.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained>`_
            documentation for more details.
        config_kwargs (Dict[str, Any], optional): Additional model configuration parameters to be passed to the Hugging Face Transformers config.
            See the `AutoConfig.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoConfig.from_pretrained>`_
            documentation for more details.
        model_card_data (:class:`~sentence_transformers.model_card.SentenceTransformerModelCardData`, optional): A model
            card data object that contains information about the model. This is used to generate a model card when saving
            the model. If not set, a default model card data object is created.
        backend (str): The backend to use for inference. Can be one of "torch" (default), "onnx", or "openvino".
            See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for benchmarking information
            on the different backends.

    Example:
        ::

            from sentence_transformers import SentenceTransformer

            # Load a pre-trained SentenceTransformer model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Encode some texts
            sentences = [
                "The weather is lovely today.",
                "It's so sunny outside!",
                "He drove to the stadium.",
            ]
            embeddings = model.encode(sentences)
            print(embeddings.shape)
            # (3, 768)

            # Get the similarity scores between all sentences
            similarities = model.similarity(embeddings, embeddings)
            print(similarities)
            # tensor([[1.0000, 0.6817, 0.0492],
            #         [0.6817, 1.0000, 0.0421],
            #         [0.0492, 0.0421, 1.0000]])
    """

    def __init__(
        self,
        model_name_or_path: str | None = None,
        modules: Iterable[nn.Module] | None = None,
        device: str | None = None,
        prompts: dict[str, str] | None = None,
        default_prompt_name: str | None = None,
        similarity_fn_name: str | SimilarityFunction | None = None,
        cache_folder: str | None = None,
        trust_remote_code: bool = False,
        revision: str | None = None,
        local_files_only: bool = False,
        token: bool | str | None = None,
        use_auth_token: bool | str | None = None,
        truncate_dim: int | None = None,
        model_kwargs: dict[str, Any] | None = None,
        tokenizer_kwargs: dict[str, Any] | None = None,
        config_kwargs: dict[str, Any] | None = None,
        model_card_data: SentenceTransformerModelCardData | None = None,
        backend: Literal["torch", "onnx", "openvino"] = "torch",
    ) -> None:
        # Note: self._load_sbert_model can also update `self.prompts` and `self.default_prompt_name`
        self.prompts = prompts or {}
        self.default_prompt_name = default_prompt_name
        self.similarity_fn_name = similarity_fn_name
        self.trust_remote_code = trust_remote_code
        self.truncate_dim = truncate_dim
        self.model_card_data = model_card_data or SentenceTransformerModelCardData()
        self.module_kwargs = None
        self._model_card_vars = {}
        self._model_card_text = None
        self._model_config = {}
        self.backend = backend
        if use_auth_token is not None:
            warnings.warn(
                "The `use_auth_token` argument is deprecated and will be removed in v4 of SentenceTransformers.",
                FutureWarning,
            )
            if token is not None:
                raise ValueError(
                    "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
                )
            token = use_auth_token

        if cache_folder is None:
            cache_folder = os.getenv("SENTENCE_TRANSFORMERS_HOME")

        if device is None:
            device = get_device_name()
            logger.info(f"Use pytorch device_name: {device}")

        if device == "hpu" and importlib.util.find_spec("optimum") is not None:
            from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi

            adapt_transformers_to_gaudi()

        if model_name_or_path is not None and model_name_or_path != "":
            logger.info(f"Load pretrained SentenceTransformer: {model_name_or_path}")

            # Old models that don't belong to any organization
            basic_transformer_models = [
                "albert-base-v1",
                "albert-base-v2",
                "albert-large-v1",
                "albert-large-v2",
                "albert-xlarge-v1",
                "albert-xlarge-v2",
                "albert-xxlarge-v1",
                "albert-xxlarge-v2",
                "bert-base-cased-finetuned-mrpc",
                "bert-base-cased",
                "bert-base-chinese",
                "bert-base-german-cased",
                "bert-base-german-dbmdz-cased",
                "bert-base-german-dbmdz-uncased",
                "bert-base-multilingual-cased",
                "bert-base-multilingual-uncased",
                "bert-base-uncased",
                "bert-large-cased-whole-word-masking-finetuned-squad",
                "bert-large-cased-whole-word-masking",
                "bert-large-cased",
                "bert-large-uncased-whole-word-masking-finetuned-squad",
                "bert-large-uncased-whole-word-masking",
                "bert-large-uncased",
                "camembert-base",
                "ctrl",
                "distilbert-base-cased-distilled-squad",
                "distilbert-base-cased",
                "distilbert-base-german-cased",
                "distilbert-base-multilingual-cased",
                "distilbert-base-uncased-distilled-squad",
                "distilbert-base-uncased-finetuned-sst-2-english",
                "distilbert-base-uncased",
                "distilgpt2",
                "distilroberta-base",
                "gpt2-large",
                "gpt2-medium",
                "gpt2-xl",
                "gpt2",
                "openai-gpt",
                "roberta-base-openai-detector",
                "roberta-base",
                "roberta-large-mnli",
                "roberta-large-openai-detector",
                "roberta-large",
                "t5-11b",
                "t5-3b",
                "t5-base",
                "t5-large",
                "t5-small",
                "transfo-xl-wt103",
                "xlm-clm-ende-1024",
                "xlm-clm-enfr-1024",
                "xlm-mlm-100-1280",
                "xlm-mlm-17-1280",
                "xlm-mlm-en-2048",
                "xlm-mlm-ende-1024",
                "xlm-mlm-enfr-1024",
                "xlm-mlm-enro-1024",
                "xlm-mlm-tlm-xnli15-1024",
                "xlm-mlm-xnli15-1024",
                "xlm-roberta-base",
                "xlm-roberta-large-finetuned-conll02-dutch",
                "xlm-roberta-large-finetuned-conll02-spanish",
                "xlm-roberta-large-finetuned-conll03-english",
                "xlm-roberta-large-finetuned-conll03-german",
                "xlm-roberta-large",
                "xlnet-base-cased",
                "xlnet-large-cased",
            ]

            if not os.path.exists(model_name_or_path):
                # Not a path, load from hub
                if "\\" in model_name_or_path or model_name_or_path.count("/") > 1:
                    raise FileNotFoundError(f"Path {model_name_or_path} not found")

                if "/" not in model_name_or_path and model_name_or_path.lower() not in basic_transformer_models:
                    # A model from sentence-transformers
                    model_name_or_path = __MODEL_HUB_ORGANIZATION__ + "/" + model_name_or_path

            if is_sentence_transformer_model(
                model_name_or_path,
                token,
                cache_folder=cache_folder,
                revision=revision,
                local_files_only=local_files_only,
            ):
                modules, self.module_kwargs = self._load_sbert_model(
                    model_name_or_path,
                    token=token,
                    cache_folder=cache_folder,
                    revision=revision,
                    trust_remote_code=trust_remote_code,
                    local_files_only=local_files_only,
                    model_kwargs=model_kwargs,
                    tokenizer_kwargs=tokenizer_kwargs,
                    config_kwargs=config_kwargs,
                )
            else:
                modules = self._load_auto_model(
                    model_name_or_path,
                    token=token,
                    cache_folder=cache_folder,
                    revision=revision,
                    trust_remote_code=trust_remote_code,
                    local_files_only=local_files_only,
                    model_kwargs=model_kwargs,
                    tokenizer_kwargs=tokenizer_kwargs,
                    config_kwargs=config_kwargs,
                )

        if modules is not None and not isinstance(modules, OrderedDict):
            modules = OrderedDict([(str(idx), module) for idx, module in enumerate(modules)])

        super().__init__(modules)

        # Ensure all tensors in the model are of the same dtype as the first tensor
        # This is necessary if the first module has been given a lower precision via
        # model_kwargs["torch_dtype"]. The rest of the model should be loaded in the same dtype
        # See #2887 for more details
        try:
            dtype = next(self.parameters()).dtype
            self.to(dtype)
        except StopIteration:
            pass

        self.to(device)
        self.is_hpu_graph_enabled = False

        if self.default_prompt_name is not None and self.default_prompt_name not in self.prompts:
            raise ValueError(
                f"Default prompt name '{self.default_prompt_name}' not found in the configured prompts "
                f"dictionary with keys {list(self.prompts.keys())!r}."
            )

        if self.prompts:
            logger.info(f"{len(self.prompts)} prompts are loaded, with the keys: {list(self.prompts.keys())}")
        if self.default_prompt_name:
            logger.warning(
                f"Default prompt name is set to '{self.default_prompt_name}'. "
                "This prompt will be applied to all `encode()` calls, except if `encode()` "
                "is called with `prompt` or `prompt_name` parameters."
            )

        # Ideally, INSTRUCTOR models should set `include_prompt=False` in their pooling configuration, but
        # that would be a breaking change for users currently using the InstructorEmbedding project.
        # So, instead we hardcode setting it for the main INSTRUCTOR models, and otherwise give a warning if we
        # suspect the user is using an INSTRUCTOR model.
        if model_name_or_path in ("hkunlp/instructor-base", "hkunlp/instructor-large", "hkunlp/instructor-xl"):
            self.set_pooling_include_prompt(include_prompt=False)
        elif (
            model_name_or_path
            and "/" in model_name_or_path
            and "instructor" in model_name_or_path.split("/")[1].lower()
        ):
            if any([module.include_prompt for module in self if isinstance(module, Pooling)]):
                logger.warning(
                    "Instructor models require `include_prompt=False` in the pooling configuration. "
                    "Either update the model configuration or call `model.set_pooling_include_prompt(False)` after loading the model."
                )

        # Pass the model to the model card data for later use in generating a model card upon saving this model
        self.model_card_data.register_model(self)

    def get_backend(self) -> Literal["torch", "onnx", "openvino"]:
        """Return the backend used for inference, which can be one of "torch", "onnx", or "openvino".

        Returns:
            str: The backend used for inference.
        """
        return self.backend

    # Return a single tensor because we're passing a single sentence.
    @overload
    def encode(
        self,
        sentences: str,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: Literal["sentence_embedding", "token_embeddings"] = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: Literal[False] = ...,
        convert_to_tensor: bool = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> Tensor: ...

    # Return a single array, because convert_to_numpy is True
    # and "sentence_embeddings" is passed
    @overload
    def encode(
        self,
        sentences: str | list[str] | np.ndarray,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: Literal["sentence_embedding"] = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: Literal[True] = ...,
        convert_to_tensor: Literal[False] = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> np.ndarray: ...

    # Return a single tensor, because convert_to_tensor is True
    # and "sentence_embeddings" is passed
    @overload
    def encode(
        self,
        sentences: str | list[str] | np.ndarray,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: Literal["sentence_embedding"] = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: bool = ...,
        convert_to_tensor: Literal[True] = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> Tensor: ...

    # Return a list of tensors. Value of convert_ doesn't matter.
    @overload
    def encode(
        self,
        sentences: list[str] | np.ndarray,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: Literal["sentence_embedding", "token_embeddings"] = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: bool = ...,
        convert_to_tensor: bool = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> list[Tensor]: ...

    # Return a list of dict of features, ignore the conversion args.
    @overload
    def encode(
        self,
        sentences: list[str] | np.ndarray,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: None = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: bool = ...,
        convert_to_tensor: bool = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> list[dict[str, Tensor]]: ...

    # Return a dict of features, ignore the conversion args.
    @overload
    def encode(
        self,
        sentences: str,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: None = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: bool = ...,
        convert_to_tensor: bool = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> dict[str, Tensor]: ...

    # If "token_embeddings" is True, then the output is a single tensor.
    @overload
    def encode(
        self,
        sentences: str,
        prompt_name: str | None = ...,
        prompt: str | None = ...,
        batch_size: int = ...,
        show_progress_bar: bool | None = ...,
        output_value: Literal["token_embeddings"] = ...,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
        convert_to_numpy: bool = ...,
        convert_to_tensor: bool = ...,
        device: str | None = ...,
        normalize_embeddings: bool = ...,
        **kwargs,
    ) -> Tensor: ...

    def encode(
        self,
        sentences: str | list[str] | np.ndarray,
        prompt_name: str | None = None,
        prompt: str | None = None,
        batch_size: int = 32,
        show_progress_bar: bool | None = None,
        output_value: Literal["sentence_embedding", "token_embeddings"] | None = "sentence_embedding",
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
        convert_to_numpy: bool = True,
        convert_to_tensor: bool = False,
        device: str | None = None,
        normalize_embeddings: bool = False,
        **kwargs,
    ) -> list[Tensor] | np.ndarray | Tensor | dict[str, Tensor] | list[dict[str, Tensor]]:
        """
        Computes sentence embeddings.

        Args:
            sentences (Union[str, List[str]]): The sentences to embed.
            prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
                which is either set in the constructor or loaded from the model configuration. For example if
                ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What
                is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
                is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
            prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is "query: ", then the
                sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
                because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
            batch_size (int, optional): The batch size used for the computation. Defaults to 32.
            show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
            output_value (Optional[Literal["sentence_embedding", "token_embeddings"]], optional): The type of embeddings to return:
                "sentence_embedding" to get sentence embeddings, "token_embeddings" to get wordpiece token embeddings, and `None`,
                to get all output values. Defaults to "sentence_embedding".
            precision (Literal["float32", "int8", "uint8", "binary", "ubinary"], optional): The precision to use for the embeddings.
                Can be "float32", "int8", "uint8", "binary", or "ubinary". All non-float32 precisions are quantized embeddings.
                Quantized embeddings are smaller in size and faster to compute, but may have a lower accuracy. They are useful for
                reducing the size of the embeddings of a corpus for semantic search, among other tasks. Defaults to "float32".
            convert_to_numpy (bool, optional): Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
                Defaults to True.
            convert_to_tensor (bool, optional): Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
                Defaults to False.
            device (str, optional): Which :class:`torch.device` to use for the computation. Defaults to None.
            normalize_embeddings (bool, optional): Whether to normalize returned vectors to have length 1. In that case,
                the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.

        Returns:
            Union[List[Tensor], ndarray, Tensor]: By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.
            If only one string input is provided, then the output is a 1d array with shape [output_dimension]. If ``convert_to_tensor``,
            a torch Tensor is returned instead. If ``self.truncate_dim <= output_dimension`` then output_dimension is ``self.truncate_dim``.

        Example:
            ::

                from sentence_transformers import SentenceTransformer

                # Load a pre-trained SentenceTransformer model
                model = SentenceTransformer('all-mpnet-base-v2')

                # Encode some texts
                sentences = [
                    "The weather is lovely today.",
                    "It's so sunny outside!",
                    "He drove to the stadium.",
                ]
                embeddings = model.encode(sentences)
                print(embeddings.shape)
                # (3, 768)
        """
        if self.device.type == "hpu" and not self.is_hpu_graph_enabled:
            import habana_frameworks.torch as ht

            if hasattr(ht, "hpu") and hasattr(ht.hpu, "wrap_in_hpu_graph"):
                ht.hpu.wrap_in_hpu_graph(self, disable_tensor_cache=True)
                self.is_hpu_graph_enabled = True

        self.eval()
        if show_progress_bar is None:
            show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)

        if convert_to_tensor:
            convert_to_numpy = False

        if output_value != "sentence_embedding":
            convert_to_tensor = False
            convert_to_numpy = False

        input_was_string = False
        if isinstance(sentences, str) or not hasattr(
            sentences, "__len__"
        ):  # Cast an individual sentence to a list with length 1
            sentences = [sentences]
            input_was_string = True

        if prompt is None:
            if prompt_name is not None:
                try:
                    prompt = self.prompts[prompt_name]
                except KeyError:
                    raise ValueError(
                        f"Prompt name '{prompt_name}' not found in the configured prompts dictionary with keys {list(self.prompts.keys())!r}."
                    )
            elif self.default_prompt_name is not None:
                prompt = self.prompts.get(self.default_prompt_name, None)
        else:
            if prompt_name is not None:
                logger.warning(
                    "Encode with either a `prompt`, a `prompt_name`, or neither, but not both. "
                    "Ignoring the `prompt_name` in favor of `prompt`."
                )

        extra_features = {}
        if prompt is not None:
            sentences = [prompt + sentence for sentence in sentences]

            # Some models (e.g. INSTRUCTOR, GRIT) require removing the prompt before pooling
            # Tracking the prompt length allow us to remove the prompt during pooling
            tokenized_prompt = self.tokenize([prompt])
            if "input_ids" in tokenized_prompt:
                extra_features["prompt_length"] = tokenized_prompt["input_ids"].shape[-1] - 1

        if device is None:
            device = self.device

        self.to(device)

        all_embeddings = []
        length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
        sentences_sorted = [sentences[idx] for idx in length_sorted_idx]

        for start_index in trange(0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar):
            sentences_batch = sentences_sorted[start_index : start_index + batch_size]

            # #==DEBUG================================================================================================
            # print(f"\n=== DEBUG: Before tokenization ===")
            # print(f"Batch size: {len(sentences_batch)}")
            # print(f"Sentences in batch:")
            # for i, sent in enumerate(sentences_batch):
                # print(f"  [{i}] Type: {type(sent)}, Length: {len(sent) if hasattr(sent, '__len__') else 'no len'}")
                # print(f"      Content: {repr(sent)}...")
            # #==DEBUG================================================================================================

            features = self.tokenize(sentences_batch)

            # #==DEBUG================================================================================================
            # print(f"\n=== DEBUG: After tokenization (features dict) ===")
            # print(f"Features keys: {list(features.keys())}")
            # for key, value in features.items():
                # print(f"  {key}:")
                # print(f"    Type: {type(value)}")
                # if hasattr(value, 'shape'):
                    # print(f"    Shape: {value.shape}")
                # elif hasattr(value, '__len__'):
                    # print(f"    Length: {len(value)}")
                    # if isinstance(value, (list, tuple)) and len(value) > 0:
                        # print(f"    First element type: {type(value[0])}")
                        # if hasattr(value[0], '__len__'):
                            # print(f"    First element length: {len(value[0])}")
                        # print(f"    Sample content: {value}")  # First 2 elements
                # print(f"    Content preview: {str(value)}...")
            # #==DEBUG================================================================================================

            # print(
                # f"SentenceTransformer.py - DEBUG: batch {start_index // batch_size} padded_side={self.tokenizer.padding_side if hasattr(self, 'tokenizer') else 'n/a'} "
                # f"max_len={self.tokenizer.model_max_length if hasattr(self, 'tokenizer') else 'n/a'} "
                # f"seq_lens={[len(ids) for ids in features['input_ids'].tolist()] if 'input_ids' in features else 'n/a'}"
            # )

            if self.device.type == "hpu":
                if "input_ids" in features:
                    curr_tokenize_len = features["input_ids"].shape
                    additional_pad_len = 2 ** math.ceil(math.log2(curr_tokenize_len[1])) - curr_tokenize_len[1]
                    features["input_ids"] = torch.cat(
                        (
                            features["input_ids"],
                            torch.ones((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
                        ),
                        -1,
                    )
                    features["attention_mask"] = torch.cat(
                        (
                            features["attention_mask"],
                            torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
                        ),
                        -1,
                    )
                    if "token_type_ids" in features:
                        features["token_type_ids"] = torch.cat(
                            (
                                features["token_type_ids"],
                                torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
                            ),
                            -1,
                        )

            features = batch_to_device(features, device)
            features.update(extra_features)

            with torch.no_grad():
                out_features = self.forward(features, **kwargs)
                if self.device.type == "hpu":
                    out_features = copy.deepcopy(out_features)

                out_features["sentence_embedding"] = truncate_embeddings(
                    out_features["sentence_embedding"], self.truncate_dim
                )

                if output_value == "token_embeddings":
                    embeddings = []
                    for token_emb, attention in zip(out_features[output_value], out_features["attention_mask"]):
                        last_mask_id = len(attention) - 1
                        while last_mask_id > 0 and attention[last_mask_id].item() == 0:
                            last_mask_id -= 1

                        embeddings.append(token_emb[0 : last_mask_id + 1])
                elif output_value is None:  # Return all outputs
                    embeddings = []
                    for idx in range(len(out_features["sentence_embedding"])):
                        batch_item = {}
                        for name, value in out_features.items():
                            try:
                                batch_item[name] = value[idx]
                            except TypeError:
                                # Handle non-indexable values (like prompt_length)
                                batch_item[name] = value
                        embeddings.append(batch_item)
                else:  # Sentence embeddings
                    embeddings = out_features[output_value]
                    embeddings = embeddings.detach()
                    if normalize_embeddings:
                        embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)

                    # fixes for #522 and #487 to avoid oom problems on gpu with large datasets
                    if convert_to_numpy:
                        embeddings = embeddings.cpu()

                all_embeddings.extend(embeddings)

        all_embeddings = [all_embeddings[idx] for idx in np.argsort(length_sorted_idx)]

        if precision and precision != "float32":
            all_embeddings = quantize_embeddings(all_embeddings, precision=precision)

        if convert_to_tensor:
            if len(all_embeddings):
                if isinstance(all_embeddings, np.ndarray):
                    all_embeddings = torch.from_numpy(all_embeddings)
                else:
                    all_embeddings = torch.stack(all_embeddings)
            else:
                all_embeddings = torch.Tensor()
        elif convert_to_numpy:
            if not isinstance(all_embeddings, np.ndarray):
                if all_embeddings and all_embeddings[0].dtype == torch.bfloat16:
                    all_embeddings = np.asarray([emb.float().numpy() for emb in all_embeddings])
                else:
                    all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
        elif isinstance(all_embeddings, np.ndarray):
            all_embeddings = [torch.from_numpy(embedding) for embedding in all_embeddings]

        if input_was_string:
            all_embeddings = all_embeddings[0]

        return all_embeddings

    def forward(self, input: dict[str, Tensor], **kwargs) -> dict[str, Tensor]:
        if self.module_kwargs is None:
            return super().forward(input)

        for module_name, module in self.named_children():
            module_kwarg_keys = self.module_kwargs.get(module_name, [])
            module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
            input = module(input, **module_kwargs)
        return input

    @property
    def similarity_fn_name(self) -> Literal["cosine", "dot", "euclidean", "manhattan"]:
        """Return the name of the similarity function used by :meth:`SentenceTransformer.similarity` and :meth:`SentenceTransformer.similarity_pairwise`.

        Returns:
            Optional[str]: The name of the similarity function. Can be None if not set, in which case it will
                default to "cosine" when first called.

        Example:
            >>> model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
            >>> model.similarity_fn_name
            'dot'
        """
        if self._similarity_fn_name is None:
            self.similarity_fn_name = SimilarityFunction.COSINE
        return self._similarity_fn_name

    @similarity_fn_name.setter
    def similarity_fn_name(
        self, value: Literal["cosine", "dot", "euclidean", "manhattan"] | SimilarityFunction
    ) -> None:
        if isinstance(value, SimilarityFunction):
            value = value.value
        self._similarity_fn_name = value

        if value is not None:
            self._similarity = SimilarityFunction.to_similarity_fn(value)
            self._similarity_pairwise = SimilarityFunction.to_similarity_pairwise_fn(value)

    @overload
    def similarity(self, embeddings1: Tensor, embeddings2: Tensor) -> Tensor: ...

    @overload
    def similarity(self, embeddings1: npt.NDArray[np.float32], embeddings2: npt.NDArray[np.float32]) -> Tensor: ...

    @property
    def similarity(self) -> Callable[[Tensor | npt.NDArray[np.float32], Tensor | npt.NDArray[np.float32]], Tensor]:
        """
        Compute the similarity between two collections of embeddings. The output will be a matrix with the similarity
        scores between all embeddings from the first parameter and all embeddings from the second parameter. This
        differs from `similarity_pairwise` which computes the similarity between each pair of embeddings.
        This method supports only embeddings with fp32 precision and does not accommodate quantized embeddings.

        Args:
            embeddings1 (Union[Tensor, ndarray]): [num_embeddings_1, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
            embeddings2 (Union[Tensor, ndarray]): [num_embeddings_2, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.

        Returns:
            Tensor: A [num_embeddings_1, num_embeddings_2]-shaped torch tensor with similarity scores.

        Example:
            ::

                >>> model = SentenceTransformer("all-mpnet-base-v2")
                >>> sentences = [
                ...     "The weather is so nice!",
                ...     "It's so sunny outside.",
                ...     "He's driving to the movie theater.",
                ...     "She's going to the cinema.",
                ... ]
                >>> embeddings = model.encode(sentences, normalize_embeddings=True)
                >>> model.similarity(embeddings, embeddings)
                tensor([[1.0000, 0.7235, 0.0290, 0.1309],
                        [0.7235, 1.0000, 0.0613, 0.1129],
                        [0.0290, 0.0613, 1.0000, 0.5027],
                        [0.1309, 0.1129, 0.5027, 1.0000]])
                >>> model.similarity_fn_name
                "cosine"
                >>> model.similarity_fn_name = "euclidean"
                >>> model.similarity(embeddings, embeddings)
                tensor([[-0.0000, -0.7437, -1.3935, -1.3184],
                        [-0.7437, -0.0000, -1.3702, -1.3320],
                        [-1.3935, -1.3702, -0.0000, -0.9973],
                        [-1.3184, -1.3320, -0.9973, -0.0000]])
        """
        if self.similarity_fn_name is None:
            self.similarity_fn_name = SimilarityFunction.COSINE
        return self._similarity

    @overload
    def similarity_pairwise(self, embeddings1: Tensor, embeddings2: Tensor) -> Tensor: ...

    @overload
    def similarity_pairwise(
        self, embeddings1: npt.NDArray[np.float32], embeddings2: npt.NDArray[np.float32]
    ) -> Tensor: ...

    @property
    def similarity_pairwise(
        self,
    ) -> Callable[[Tensor | npt.NDArray[np.float32], Tensor | npt.NDArray[np.float32]], Tensor]:
        """
        Compute the similarity between two collections of embeddings. The output will be a vector with the similarity
        scores between each pair of embeddings.
        This method supports only embeddings with fp32 precision and does not accommodate quantized embeddings.

        Args:
            embeddings1 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
            embeddings2 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.

        Returns:
            Tensor: A [num_embeddings]-shaped torch tensor with pairwise similarity scores.

        Example:
            ::

                >>> model = SentenceTransformer("all-mpnet-base-v2")
                >>> sentences = [
                ...     "The weather is so nice!",
                ...     "It's so sunny outside.",
                ...     "He's driving to the movie theater.",
                ...     "She's going to the cinema.",
                ... ]
                >>> embeddings = model.encode(sentences, normalize_embeddings=True)
                >>> model.similarity_pairwise(embeddings[::2], embeddings[1::2])
                tensor([0.7235, 0.5027])
                >>> model.similarity_fn_name
                "cosine"
                >>> model.similarity_fn_name = "euclidean"
                >>> model.similarity_pairwise(embeddings[::2], embeddings[1::2])
                tensor([-0.7437, -0.9973])
        """
        if self.similarity_fn_name is None:
            self.similarity_fn_name = SimilarityFunction.COSINE
        return self._similarity_pairwise

    def start_multi_process_pool(
        self, target_devices: list[str] = None
    ) -> dict[Literal["input", "output", "processes"], Any]:
        """
        Starts a multi-process pool to process the encoding with several independent processes
        via :meth:`SentenceTransformer.encode_multi_process <sentence_transformers.SentenceTransformer.encode_multi_process>`.

        This method is recommended if you want to encode on multiple GPUs or CPUs. It is advised
        to start only one process per GPU. This method works together with encode_multi_process
        and stop_multi_process_pool.

        Args:
            target_devices (List[str], optional): PyTorch target devices, e.g. ["cuda:0", "cuda:1", ...],
                ["npu:0", "npu:1", ...], or ["cpu", "cpu", "cpu", "cpu"]. If target_devices is None and CUDA/NPU
                is available, then all available CUDA/NPU devices will be used. If target_devices is None and
                CUDA/NPU is not available, then 4 CPU devices will be used.

        Returns:
            Dict[str, Any]: A dictionary with the target processes, an input queue, and an output queue.
        """
        if target_devices is None:
            if torch.cuda.is_available():
                target_devices = [f"cuda:{i}" for i in range(torch.cuda.device_count())]
            elif is_torch_npu_available():
                target_devices = [f"npu:{i}" for i in range(torch.npu.device_count())]
            else:
                logger.info("CUDA/NPU is not available. Starting 4 CPU workers")
                target_devices = ["cpu"] * 4

        logger.info("Start multi-process pool on devices: {}".format(", ".join(map(str, target_devices))))

        self.to("cpu")
        self.share_memory()
        ctx = mp.get_context("spawn")
        input_queue = ctx.Queue()
        output_queue = ctx.Queue()
        processes = []

        for device_id in target_devices:
            p = ctx.Process(
                target=SentenceTransformer._encode_multi_process_worker,
                args=(device_id, self, input_queue, output_queue),
                daemon=True,
            )
            p.start()
            processes.append(p)

        return {"input": input_queue, "output": output_queue, "processes": processes}

    @staticmethod
    def stop_multi_process_pool(pool: dict[Literal["input", "output", "processes"], Any]) -> None:
        """
        Stops all processes started with start_multi_process_pool.

        Args:
            pool (Dict[str, object]): A dictionary containing the input queue, output queue, and process list.

        Returns:
            None
        """
        for p in pool["processes"]:
            p.terminate()

        for p in pool["processes"]:
            p.join()
            p.close()

        pool["input"].close()
        pool["output"].close()

    def encode_multi_process(
        self,
        sentences: list[str],
        pool: dict[Literal["input", "output", "processes"], Any],
        prompt_name: str | None = None,
        prompt: str | None = None,
        batch_size: int = 32,
        chunk_size: int = None,
        show_progress_bar: bool | None = None,
        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
        normalize_embeddings: bool = False,
    ) -> np.ndarray:
        """
        Encodes a list of sentences using multiple processes and GPUs via
        :meth:`SentenceTransformer.encode <sentence_transformers.SentenceTransformer.encode>`.
        The sentences are chunked into smaller packages and sent to individual processes, which encode them on different
        GPUs or CPUs. This method is only suitable for encoding large sets of sentences.

        Args:
            sentences (List[str]): List of sentences to encode.
            pool (Dict[Literal["input", "output", "processes"], Any]): A pool of workers started with
                :meth:`SentenceTransformer.start_multi_process_pool <sentence_transformers.SentenceTransformer.start_multi_process_pool>`.
            prompt_name (Optional[str], optional): The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary,
                which is either set in the constructor or loaded from the model configuration. For example if
                ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What
                is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
                is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
            prompt (Optional[str], optional): The prompt to use for encoding. For example, if the prompt is "query: ", then the
                sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
                because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored. Defaults to None.
            batch_size (int): Encode sentences with batch size. (default: 32)
            chunk_size (int): Sentences are chunked and sent to the individual processes. If None, it determines a
                sensible size. Defaults to None.
            show_progress_bar (bool, optional): Whether to output a progress bar when encode sentences. Defaults to None.
            precision (Literal["float32", "int8", "uint8", "binary", "ubinary"]): The precision to use for the
                embeddings. Can be "float32", "int8", "uint8", "binary", or "ubinary". All non-float32 precisions
                are quantized embeddings. Quantized embeddings are smaller in size and faster to compute, but may
                have lower accuracy. They are useful for reducing the size of the embeddings of a corpus for
                semantic search, among other tasks. Defaults to "float32".
            normalize_embeddings (bool): Whether to normalize returned vectors to have length 1. In that case,
                the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.

        Returns:
            np.ndarray: A 2D numpy array with shape [num_inputs, output_dimension].

        Example:
            ::

                from sentence_transformers import SentenceTransformer

                def main():
                    model = SentenceTransformer("all-mpnet-base-v2")
                    sentences = ["The weather is so nice!", "It's so sunny outside.", "He's driving to the movie theater.", "She's going to the cinema."] * 1000

                    pool = model.start_multi_process_pool()
                    embeddings = model.encode_multi_process(sentences, pool)
                    model.stop_multi_process_pool(pool)

                    print(embeddings.shape)
                    # => (4000, 768)

                if __name__ == "__main__":
                    main()
        """

        if chunk_size is None:
            chunk_size = min(math.ceil(len(sentences) / len(pool["processes"]) / 10), 5000)

        if show_progress_bar is None:
            show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)

        # print(f"Chunk data into {math.ceil(len(sentences) / chunk_size)} packages of size {chunk_size}")

        input_queue = pool["input"]
        last_chunk_id = 0
        chunk = []

        for sentence in sentences:
            chunk.append(sentence)
            if len(chunk) >= chunk_size:
                input_queue.put(
                    [last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings]
                )
                last_chunk_id += 1
                chunk = []

        if len(chunk) > 0:
            input_queue.put([last_chunk_id, batch_size, chunk, prompt_name, prompt, precision, normalize_embeddings])
            last_chunk_id += 1

        output_queue = pool["output"]
        results_list = sorted(
            [output_queue.get() for _ in trange(last_chunk_id, desc="Chunks", disable=not show_progress_bar)],
            key=lambda x: x[0],
        )
        embeddings = np.concatenate([result[1] for result in results_list])
        return embeddings

    @staticmethod
    def _encode_multi_process_worker(
        target_device: str, model: SentenceTransformer, input_queue: Queue, results_queue: Queue
    ) -> None:
        """
        Internal working process to encode sentences in multi-process setup
        """
        while True:
            try:
                chunk_id, batch_size, sentences, prompt_name, prompt, precision, normalize_embeddings = (
                    input_queue.get()
                )
                embeddings = model.encode(
                    sentences,
                    prompt_name=prompt_name,
                    prompt=prompt,
                    device=target_device,
                    show_progress_bar=False,
                    precision=precision,
                    convert_to_numpy=True,
                    batch_size=batch_size,
                    normalize_embeddings=normalize_embeddings,
                )

                results_queue.put([chunk_id, embeddings])
            except queue.Empty:
                break

    def set_pooling_include_prompt(self, include_prompt: bool) -> None:
        """
        Sets the `include_prompt` attribute in the pooling layer in the model, if there is one.

        This is useful for INSTRUCTOR models, as the prompt should be excluded from the pooling strategy
        for these models.

        Args:
            include_prompt (bool): Whether to include the prompt in the pooling layer.

        Returns:
            None
        """
        for module in self:
            if isinstance(module, Pooling):
                module.include_prompt = include_prompt
                break

    def get_max_seq_length(self) -> int | None:
        """
        Returns the maximal sequence length that the model accepts. Longer inputs will be truncated.

        Returns:
            Optional[int]: The maximal sequence length that the model accepts, or None if it is not defined.
        """
        if hasattr(self._first_module(), "max_seq_length"):
            return self._first_module().max_seq_length

        return None

    def tokenize(self, texts: list[str] | list[dict] | list[tuple[str, str]]) -> dict[str, Tensor]:
        """
        Tokenizes the texts.

        Args:
            texts (Union[List[str], List[Dict], List[Tuple[str, str]]]): A list of texts to be tokenized.

        Returns:
            Dict[str, Tensor]: A dictionary of tensors with the tokenized texts. Common keys are "input_ids",
                "attention_mask", and "token_type_ids".
        """
        # print(f"SentenceTransformer.py - DEBUG: tokenize(): got {len(texts)} texts")
        return self._first_module().tokenize(texts)

    def get_sentence_features(self, *features) -> dict[Literal["sentence_embedding"], Tensor]:
        return self._first_module().get_sentence_features(*features)

    def get_sentence_embedding_dimension(self) -> int | None:
        """
        Returns the number of dimensions in the output of :meth:`SentenceTransformer.encode <sentence_transformers.SentenceTransformer.encode>`.

        Returns:
            Optional[int]: The number of dimensions in the output of `encode`. If it's not known, it's `None`.
        """
        output_dim = None
        for mod in reversed(self._modules.values()):
            sent_embedding_dim_method = getattr(mod, "get_sentence_embedding_dimension", None)
            if callable(sent_embedding_dim_method):
                output_dim = sent_embedding_dim_method()
                break
        if self.truncate_dim is not None:
            # The user requested truncation. If they set it to a dim greater than output_dim,
            # no truncation will actually happen. So return output_dim instead of self.truncate_dim
            return min(output_dim or np.inf, self.truncate_dim)
        return output_dim

    @contextmanager
    def truncate_sentence_embeddings(self, truncate_dim: int | None) -> Iterator[None]:
        """
        In this context, :meth:`SentenceTransformer.encode <sentence_transformers.SentenceTransformer.encode>` outputs
        sentence embeddings truncated at dimension ``truncate_dim``.

        This may be useful when you are using the same model for different applications where different dimensions
        are needed.

        Args:
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. ``None`` does no truncation.

        Example:
            ::

                from sentence_transformers import SentenceTransformer

                model = SentenceTransformer("all-mpnet-base-v2")

                with model.truncate_sentence_embeddings(truncate_dim=16):
                    embeddings_truncated = model.encode(["hello there", "hiya"])
                assert embeddings_truncated.shape[-1] == 16
        """
        original_output_dim = self.truncate_dim
        try:
            self.truncate_dim = truncate_dim
            yield
        finally:
            self.truncate_dim = original_output_dim

    def _first_module(self) -> torch.nn.Module:
        """Returns the first module of this sequential embedder"""
        return self._modules[next(iter(self._modules))]

    def _last_module(self) -> torch.nn.Module:
        """Returns the last module of this sequential embedder"""
        return self._modules[next(reversed(self._modules))]

    def save(
        self,
        path: str,
        model_name: str | None = None,
        create_model_card: bool = True,
        train_datasets: list[str] | None = None,
        safe_serialization: bool = True,
    ) -> None:
        """
        Saves a model and its configuration files to a directory, so that it can be loaded
        with ``SentenceTransformer(path)`` again.

        Args:
            path (str): Path on disc where the model will be saved.
            model_name (str, optional): Optional model name.
            create_model_card (bool, optional): If True, create a README.md with basic information about this model.
            train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
            safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
                the traditional (but unsafe) PyTorch way.
        """
        if path is None:
            return

        os.makedirs(path, exist_ok=True)

        logger.info(f"Save model to {path}")
        modules_config = []

        # Save some model info
        self._model_config["__version__"] = {
            "sentence_transformers": __version__,
            "transformers": transformers.__version__,
            "pytorch": torch.__version__,
        }

        with open(os.path.join(path, "config_sentence_transformers.json"), "w") as fOut:
            config = self._model_config.copy()
            config["prompts"] = self.prompts
            config["default_prompt_name"] = self.default_prompt_name
            config["similarity_fn_name"] = self.similarity_fn_name
            json.dump(config, fOut, indent=2)

        # Save modules
        for idx, name in enumerate(self._modules):
            module = self._modules[name]
            if idx == 0 and hasattr(module, "save_in_root"):  # Save first module in the main folder
                model_path = path + "/"
            else:
                model_path = os.path.join(path, str(idx) + "_" + type(module).__name__)

            os.makedirs(model_path, exist_ok=True)
            # Try to save with safetensors, but fall back to the traditional PyTorch way if the module doesn't support it
            try:
                module.save(model_path, safe_serialization=safe_serialization)
            except TypeError:
                module.save(model_path)

            # "module" only works for Sentence Transformers as the modules have the same names as the classes
            class_ref = type(module).__module__
            # For remote modules, we want to remove "transformers_modules.{repo_name}":
            if class_ref.startswith("transformers_modules."):
                class_file = sys.modules[class_ref].__file__

                # Save the custom module file
                dest_file = Path(model_path) / (Path(class_file).name)
                shutil.copy(class_file, dest_file)

                # Save all files importeed in the custom module file
                for needed_file in get_relative_import_files(class_file):
                    dest_file = Path(model_path) / (Path(needed_file).name)
                    shutil.copy(needed_file, dest_file)

                # For remote modules, we want to ignore the "transformers_modules.{repo_id}" part,
                # i.e. we only want the filename
                class_ref = f"{class_ref.split('.')[-1]}.{type(module).__name__}"
            # For other cases, we want to add the class name:
            elif not class_ref.startswith("sentence_transformers."):
                class_ref = f"{class_ref}.{type(module).__name__}"

            module_config = {"idx": idx, "name": name, "path": os.path.basename(model_path), "type": class_ref}
            if self.module_kwargs and name in self.module_kwargs and (module_kwargs := self.module_kwargs[name]):
                module_config["kwargs"] = module_kwargs
            modules_config.append(module_config)

        with open(os.path.join(path, "modules.json"), "w") as fOut:
            json.dump(modules_config, fOut, indent=2)

        # Create model card
        if create_model_card:
            self._create_model_card(path, model_name, train_datasets)

    def save_pretrained(
        self,
        path: str,
        model_name: str | None = None,
        create_model_card: bool = True,
        train_datasets: list[str] | None = None,
        safe_serialization: bool = True,
    ) -> None:
        """
        Saves a model and its configuration files to a directory, so that it can be loaded
        with ``SentenceTransformer(path)`` again.

        Args:
            path (str): Path on disc where the model will be saved.
            model_name (str, optional): Optional model name.
            create_model_card (bool, optional): If True, create a README.md with basic information about this model.
            train_datasets (List[str], optional): Optional list with the names of the datasets used to train the model.
            safe_serialization (bool, optional): If True, save the model using safetensors. If False, save the model
                the traditional (but unsafe) PyTorch way.
        """
        self.save(
            path,
            model_name=model_name,
            create_model_card=create_model_card,
            train_datasets=train_datasets,
            safe_serialization=safe_serialization,
        )

    def _create_model_card(
        self, path: str, model_name: str | None = None, train_datasets: list[str] | None = "deprecated"
    ) -> None:
        """
        Create an automatic model and stores it in the specified path. If no training was done and the loaded model
        was a Sentence Transformer model already, then its model card is reused.

        Args:
            path (str): The path where the model card will be stored.
            model_name (Optional[str], optional): The name of the model. Defaults to None.
            train_datasets (Optional[List[str]], optional): Deprecated argument. Defaults to "deprecated".

        Returns:
            None
        """
        if model_name:
            model_path = Path(model_name)
            if not model_path.exists() and not self.model_card_data.model_id:
                self.model_card_data.model_id = model_name

        # If we loaded a Sentence Transformer model from the Hub, and no training was done, then
        # we don't generate a new model card, but reuse the old one instead.
        if self._model_card_text and "generated_from_trainer" not in self.model_card_data.tags:
            model_card = self._model_card_text
            if self.model_card_data.model_id:
                # If the original model card was saved without a model_id, we replace the model_id with the new model_id
                model_card = model_card.replace(
                    'model = SentenceTransformer("sentence_transformers_model_id"',
                    f'model = SentenceTransformer("{self.model_card_data.model_id}"',
                )
        else:
            try:
                model_card = generate_model_card(self)
            except Exception:
                logger.error(
                    f"Error while generating model card:\n{traceback.format_exc()}"
                    "Consider opening an issue on https://github.com/UKPLab/sentence-transformers/issues with this traceback.\n"
                    "Skipping model card creation."
                )
                return

        with open(os.path.join(path, "README.md"), "w", encoding="utf8") as fOut:
            fOut.write(model_card)

    @save_to_hub_args_decorator
    def save_to_hub(
        self,
        repo_id: str,
        organization: str | None = None,
        token: str | None = None,
        private: bool | None = None,
        safe_serialization: bool = True,
        commit_message: str = "Add new SentenceTransformer model.",
        local_model_path: str | None = None,
        exist_ok: bool = False,
        replace_model_card: bool = False,
        train_datasets: list[str] | None = None,
    ) -> str:
        """
        DEPRECATED, use `push_to_hub` instead.

        Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.

        Args:
            repo_id (str): Repository name for your model in the Hub, including the user or organization.
            token (str, optional): An authentication token (See https://huggingface.co/settings/token)
            private (bool, optional): Set to true, for hosting a private model
            safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
            commit_message (str, optional): Message to commit while pushing.
            local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
            exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
            replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
            train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.

        Returns:
            str: The url of the commit of your model in the repository on the Hugging Face Hub.
        """
        logger.warning(
            "The `save_to_hub` method is deprecated and will be removed in a future version of SentenceTransformers."
            " Please use `push_to_hub` instead for future model uploads."
        )

        if organization:
            if "/" not in repo_id:
                logger.warning(
                    f'Providing an `organization` to `save_to_hub` is deprecated, please use `repo_id="{organization}/{repo_id}"` instead.'
                )
                repo_id = f"{organization}/{repo_id}"
            elif repo_id.split("/")[0] != organization:
                raise ValueError(
                    "Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id`."
                )
            else:
                logger.warning(
                    f'Providing an `organization` to `save_to_hub` is deprecated, please only use `repo_id="{repo_id}"` instead.'
                )

        return self.push_to_hub(
            repo_id=repo_id,
            token=token,
            private=private,
            safe_serialization=safe_serialization,
            commit_message=commit_message,
            local_model_path=local_model_path,
            exist_ok=exist_ok,
            replace_model_card=replace_model_card,
            train_datasets=train_datasets,
        )

    def push_to_hub(
        self,
        repo_id: str,
        token: str | None = None,
        private: bool | None = None,
        safe_serialization: bool = True,
        commit_message: str | None = None,
        local_model_path: str | None = None,
        exist_ok: bool = False,
        replace_model_card: bool = False,
        train_datasets: list[str] | None = None,
        revision: str | None = None,
        create_pr: bool = False,
    ) -> str:
        """
        Uploads all elements of this Sentence Transformer to a new HuggingFace Hub repository.

        Args:
            repo_id (str): Repository name for your model in the Hub, including the user or organization.
            token (str, optional): An authentication token (See https://huggingface.co/settings/token)
            private (bool, optional): Set to true, for hosting a private model
            safe_serialization (bool, optional): If true, save the model using safetensors. If false, save the model the traditional PyTorch way
            commit_message (str, optional): Message to commit while pushing.
            local_model_path (str, optional): Path of the model locally. If set, this file path will be uploaded. Otherwise, the current model will be uploaded
            exist_ok (bool, optional): If true, saving to an existing repository is OK. If false, saving only to a new repository is possible
            replace_model_card (bool, optional): If true, replace an existing model card in the hub with the automatically created model card
            train_datasets (List[str], optional): Datasets used to train the model. If set, the datasets will be added to the model card in the Hub.
            revision (str, optional): Branch to push the uploaded files to
            create_pr (bool, optional): If True, create a pull request instead of pushing directly to the main branch

        Returns:
            str: The url of the commit of your model in the repository on the Hugging Face Hub.
        """
        api = HfApi(token=token)
        repo_url = api.create_repo(
            repo_id=repo_id,
            private=private,
            repo_type=None,
            exist_ok=exist_ok or create_pr,
        )
        repo_id = repo_url.repo_id  # Update the repo_id in case the old repo_id didn't contain a user or organization
        self.model_card_data.set_model_id(repo_id)
        if revision is not None:
            api.create_branch(repo_id=repo_id, branch=revision, exist_ok=True)

        if commit_message is None:
            backend = self.get_backend()
            if backend == "torch":
                commit_message = "Add new SentenceTransformer model"
            else:
                commit_message = f"Add new SentenceTransformer model with an {backend} backend"

        commit_description = ""
        if create_pr:
            commit_description = f"""\
Hello!

*This pull request has been automatically generated from the [`push_to_hub`](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer.push_to_hub) method from the Sentence Transformers library.*

## Full Model Architecture:
```
{self}
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer

# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "{repo_id}",
    revision=f"refs/pr/{{pr_number}}",
    backend="{self.get_backend()}",
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
"""

        if local_model_path:
            folder_url = api.upload_folder(
                repo_id=repo_id,
                folder_path=local_model_path,
                commit_message=commit_message,
                commit_description=commit_description,
                revision=revision,
                create_pr=create_pr,
            )
        else:
            with tempfile.TemporaryDirectory() as tmp_dir:
                create_model_card = replace_model_card or not os.path.exists(os.path.join(tmp_dir, "README.md"))
                self.save_pretrained(
                    tmp_dir,
                    model_name=repo_url.repo_id,
                    create_model_card=create_model_card,
                    train_datasets=train_datasets,
                    safe_serialization=safe_serialization,
                )
                folder_url = api.upload_folder(
                    repo_id=repo_id,
                    folder_path=tmp_dir,
                    commit_message=commit_message,
                    commit_description=commit_description,
                    revision=revision,
                    create_pr=create_pr,
                )

        if create_pr:
            return folder_url.pr_url
        return folder_url.commit_url

    # def _text_length(self, text: list[int] | list[list[int]]) -> int:
        # """
        # Help function to get the length for the input text. Text can be either
        # a list of ints (which means a single text as input), or a tuple of list of ints
        # (representing several text inputs to the model).
        # """

        # if isinstance(text, dict):  # {key: value} case
            # return len(next(iter(text.values())))
        # elif not hasattr(text, "__len__"):  # Object has no len() method
            # return 1
        # elif len(text) == 0 or isinstance(text[0], int):  # Empty string or list of ints
            # return len(text)
        # else:
            # return sum([len(t) for t in text])  # Sum of length of individual strings

    # custom method that's more flexible and expansive
    def _text_length(self, text: str | list[int] | list[list[int]]) -> int:
        """
        Help function to get the length for the input text. Text can be either
        a list of ints (which means a single text as input), or a tuple of list of ints
        (representing several text inputs to the model).
        """
        if isinstance(text, str):  # Handle string input directly
            return len(text)
        elif isinstance(text, dict):  # {key: value} case
            return len(next(iter(text.values())))
        elif not hasattr(text, "__len__"):  # Object has no len() method
            return 1
        elif len(text) == 0 or isinstance(text[0], int):  # Empty string or list of ints
            return len(text)
        else:
            return sum([len(t) for t in text])  # Sum of length of individual strings

    def evaluate(self, evaluator: SentenceEvaluator, output_path: str = None) -> dict[str, float] | float:
        """
        Evaluate the model based on an evaluator

        Args:
            evaluator (SentenceEvaluator): The evaluator used to evaluate the model.
            output_path (str, optional): The path where the evaluator can write the results. Defaults to None.

        Returns:
            The evaluation results.
        """
        if output_path is not None:
            os.makedirs(output_path, exist_ok=True)
        return evaluator(self, output_path)

    def _load_auto_model(
        self,
        model_name_or_path: str,
        token: bool | str | None,
        cache_folder: str | None,
        revision: str | None = None,
        trust_remote_code: bool = False,
        local_files_only: bool = False,
        model_kwargs: dict[str, Any] | None = None,
        tokenizer_kwargs: dict[str, Any] | None = None,
        config_kwargs: dict[str, Any] | None = None,
    ) -> list[nn.Module]:
        """
        Creates a simple Transformer + Mean Pooling model and returns the modules

        Args:
            model_name_or_path (str): The name or path of the pre-trained model.
            token (Optional[Union[bool, str]]): The token to use for the model.
            cache_folder (Optional[str]): The folder to cache the model.
            revision (Optional[str], optional): The revision of the model. Defaults to None.
            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
            local_files_only (bool, optional): Whether to use only local files. Defaults to False.
            model_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the model. Defaults to None.
            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the tokenizer. Defaults to None.
            config_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the config. Defaults to None.

        Returns:
            List[nn.Module]: A list containing the transformer model and the pooling model.
        """
        logger.warning(
            f"No sentence-transformers model found with name {model_name_or_path}. Creating a new one with mean pooling."
        )

        shared_kwargs = {
            "token": token,
            "trust_remote_code": trust_remote_code,
            "revision": revision,
            "local_files_only": local_files_only,
        }
        model_kwargs = shared_kwargs if model_kwargs is None else {**shared_kwargs, **model_kwargs}
        tokenizer_kwargs = shared_kwargs if tokenizer_kwargs is None else {**shared_kwargs, **tokenizer_kwargs}
        config_kwargs = shared_kwargs if config_kwargs is None else {**shared_kwargs, **config_kwargs}

        transformer_model = Transformer(
            model_name_or_path,
            cache_dir=cache_folder,
            model_args=model_kwargs,
            tokenizer_args=tokenizer_kwargs,
            config_args=config_kwargs,
            backend=self.backend,
        )
        pooling_model = Pooling(transformer_model.get_word_embedding_dimension(), "mean")
        if not local_files_only:
            self.model_card_data.set_base_model(model_name_or_path, revision=revision)
        return [transformer_model, pooling_model]

    def _load_module_class_from_ref(
        self,
        class_ref: str,
        model_name_or_path: str,
        trust_remote_code: bool,
        revision: str | None,
        model_kwargs: dict[str, Any] | None,
    ) -> nn.Module:
        # If the class is from sentence_transformers, we can directly import it,
        # otherwise, we try to import it dynamically, and if that fails, we fall back to the default import
        if class_ref.startswith("sentence_transformers."):
            return import_from_string(class_ref)

        if trust_remote_code or os.path.exists(model_name_or_path):
            code_revision = model_kwargs.pop("code_revision", None) if model_kwargs else None
            try:
                return get_class_from_dynamic_module(
                    class_ref,
                    model_name_or_path,
                    revision=revision,
                    code_revision=code_revision,
                )
            except (OSError, ValueError):
                # Ignore the error if 1) the file does not exist, or 2) the class_ref is not correctly formatted/found
                pass

        return import_from_string(class_ref)

    def _load_sbert_model(
        self,
        model_name_or_path: str,
        token: bool | str | None,
        cache_folder: str | None,
        revision: str | None = None,
        trust_remote_code: bool = False,
        local_files_only: bool = False,
        model_kwargs: dict[str, Any] | None = None,
        tokenizer_kwargs: dict[str, Any] | None = None,
        config_kwargs: dict[str, Any] | None = None,
    ) -> dict[str, nn.Module]:
        """
        Loads a full SentenceTransformer model using the modules.json file.

        Args:
            model_name_or_path (str): The name or path of the pre-trained model.
            token (Optional[Union[bool, str]]): The token to use for the model.
            cache_folder (Optional[str]): The folder to cache the model.
            revision (Optional[str], optional): The revision of the model. Defaults to None.
            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
            local_files_only (bool, optional): Whether to use only local files. Defaults to False.
            model_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the model. Defaults to None.
            tokenizer_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the tokenizer. Defaults to None.
            config_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the config. Defaults to None.

        Returns:
            OrderedDict[str, nn.Module]: An ordered dictionary containing the modules of the model.
        """
        # Check if the config_sentence_transformers.json file exists (exists since v2 of the framework)
        config_sentence_transformers_json_path = load_file_path(
            model_name_or_path,
            "config_sentence_transformers.json",
            token=token,
            cache_folder=cache_folder,
            revision=revision,
            local_files_only=local_files_only,
        )
        if config_sentence_transformers_json_path is not None:
            with open(config_sentence_transformers_json_path) as fIn:
                self._model_config = json.load(fIn)

            if (
                "__version__" in self._model_config
                and "sentence_transformers" in self._model_config["__version__"]
                and version.parse(self._model_config["__version__"]["sentence_transformers"])
                > version.parse(__version__)
            ):
                logger.warning(
                    f'You are trying to use a model that was created with Sentence Transformers version {self._model_config["__version__"]["sentence_transformers"]}, '
                    f"but you're currently using version {__version__}. This might cause unexpected behavior or errors. "
                    "In that case, try to update to the latest version."
                )

            # Set score functions & prompts if not already overridden by the __init__ calls
            if self._similarity_fn_name is None:
                self.similarity_fn_name = self._model_config.get("similarity_fn_name", None)
            if not self.prompts:
                self.prompts = self._model_config.get("prompts", {})
            if not self.default_prompt_name:
                self.default_prompt_name = self._model_config.get("default_prompt_name", None)

        # Check if a readme exists
        model_card_path = load_file_path(
            model_name_or_path,
            "README.md",
            token=token,
            cache_folder=cache_folder,
            revision=revision,
            local_files_only=local_files_only,
        )
        if model_card_path is not None:
            try:
                with open(model_card_path, encoding="utf8") as fIn:
                    self._model_card_text = fIn.read()
            except Exception:
                pass

        # Load the modules of sentence transformer
        modules_json_path = load_file_path(
            model_name_or_path,
            "modules.json",
            token=token,
            cache_folder=cache_folder,
            revision=revision,
            local_files_only=local_files_only,
        )
        with open(modules_json_path) as fIn:
            modules_config = json.load(fIn)

        modules = OrderedDict()
        module_kwargs = OrderedDict()
        for module_config in modules_config:
            class_ref = module_config["type"]
            module_class = self._load_module_class_from_ref(
                class_ref, model_name_or_path, trust_remote_code, revision, model_kwargs
            )

            # For Transformer, don't load the full directory, rely on `transformers` instead
            # But, do load the config file first.
            if module_config["path"] == "":
                kwargs = {}
                for config_name in [
                    "sentence_bert_config.json",
                    "sentence_roberta_config.json",
                    "sentence_distilbert_config.json",
                    "sentence_camembert_config.json",
                    "sentence_albert_config.json",
                    "sentence_xlm-roberta_config.json",
                    "sentence_xlnet_config.json",
                ]:
                    config_path = load_file_path(
                        model_name_or_path,
                        config_name,
                        token=token,
                        cache_folder=cache_folder,
                        revision=revision,
                        local_files_only=local_files_only,
                    )
                    if config_path is not None:
                        with open(config_path) as fIn:
                            kwargs = json.load(fIn)
                            # Don't allow configs to set trust_remote_code
                            if "model_args" in kwargs and "trust_remote_code" in kwargs["model_args"]:
                                kwargs["model_args"].pop("trust_remote_code")
                            if "tokenizer_args" in kwargs and "trust_remote_code" in kwargs["tokenizer_args"]:
                                kwargs["tokenizer_args"].pop("trust_remote_code")
                            if "config_args" in kwargs and "trust_remote_code" in kwargs["config_args"]:
                                kwargs["config_args"].pop("trust_remote_code")
                        break

                hub_kwargs = {
                    "token": token,
                    "trust_remote_code": trust_remote_code,
                    "revision": revision,
                    "local_files_only": local_files_only,
                }
                # 3rd priority: config file
                if "model_args" not in kwargs:
                    kwargs["model_args"] = {}
                if "tokenizer_args" not in kwargs:
                    kwargs["tokenizer_args"] = {}
                if "config_args" not in kwargs:
                    kwargs["config_args"] = {}

                # 2nd priority: hub_kwargs
                kwargs["model_args"].update(hub_kwargs)
                kwargs["tokenizer_args"].update(hub_kwargs)
                kwargs["config_args"].update(hub_kwargs)

                # 1st priority: kwargs passed to SentenceTransformer
                if model_kwargs:
                    kwargs["model_args"].update(model_kwargs)
                if tokenizer_kwargs:
                    kwargs["tokenizer_args"].update(tokenizer_kwargs)
                if config_kwargs:
                    kwargs["config_args"].update(config_kwargs)

                # Try to initialize the module with a lot of kwargs, but only if the module supports them
                # Otherwise we fall back to the load method
                try:
                    module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
                except TypeError:
                    module = module_class.load(model_name_or_path)
            else:
                # Normalize does not require any files to be loaded
                if module_class == Normalize:
                    module_path = None
                else:
                    module_path = load_dir_path(
                        model_name_or_path,
                        module_config["path"],
                        token=token,
                        cache_folder=cache_folder,
                        revision=revision,
                        local_files_only=local_files_only,
                    )
                module = module_class.load(module_path)

            modules[module_config["name"]] = module
            module_kwargs[module_config["name"]] = module_config.get("kwargs", [])

        if revision is None:
            path_parts = Path(modules_json_path)
            if len(path_parts.parts) >= 2:
                revision_path_part = Path(modules_json_path).parts[-2]
                if len(revision_path_part) == 40:
                    revision = revision_path_part
        if not local_files_only:
            self.model_card_data.set_base_model(model_name_or_path, revision=revision)
        return modules, module_kwargs

    @staticmethod
    def load(input_path) -> SentenceTransformer:
        return SentenceTransformer(input_path)

    @property
    def device(self) -> device:
        """
        Get torch.device from module, assuming that the whole module has one device.
        In case there are no PyTorch parameters, fall back to CPU.
        """
        if isinstance(self[0], Transformer):
            return self[0].auto_model.device

        try:
            return next(self.parameters()).device
        except StopIteration:
            # For nn.DataParallel compatibility in PyTorch 1.5

            def find_tensor_attributes(module: nn.Module) -> list[tuple[str, Tensor]]:
                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
                return tuples

            gen = self._named_members(get_members_fn=find_tensor_attributes)
            try:
                first_tuple = next(gen)
                return first_tuple[1].device
            except StopIteration:
                return torch.device("cpu")

    @property
    def tokenizer(self) -> Any:
        """
        Property to get the tokenizer that is used by this model
        """
        return self._first_module().tokenizer

    @tokenizer.setter
    def tokenizer(self, value) -> None:
        """
        Property to set the tokenizer that should be used by this model
        """
        self._first_module().tokenizer = value

    @property
    def max_seq_length(self) -> int:
        """
        Returns the maximal input sequence length for the model. Longer inputs will be truncated.

        Returns:
            int: The maximal input sequence length.

        Example:
            ::

                from sentence_transformers import SentenceTransformer

                model = SentenceTransformer("all-mpnet-base-v2")
                print(model.max_seq_length)
                # => 384
        """
        return self._first_module().max_seq_length

    @max_seq_length.setter
    def max_seq_length(self, value) -> None:
        """
        Property to set the maximal input sequence length for the model. Longer inputs will be truncated.
        """
        self._first_module().max_seq_length = value

    @property
    def _target_device(self) -> torch.device:
        logger.warning(
            "`SentenceTransformer._target_device` has been deprecated, please use `SentenceTransformer.device` instead.",
        )
        return self.device

    @_target_device.setter
    def _target_device(self, device: int | str | torch.device | None = None) -> None:
        self.to(device)

    @property
    def _no_split_modules(self) -> list[str]:
        try:
            return self._first_module()._no_split_modules
        except AttributeError:
            return []

    @property
    def _keys_to_ignore_on_save(self) -> list[str]:
        try:
            return self._first_module()._keys_to_ignore_on_save
        except AttributeError:
            return []

    def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=None) -> None:
        # Propagate the gradient checkpointing to the transformer model
        for module in self:
            if isinstance(module, Transformer):
                return module.auto_model.gradient_checkpointing_enable(gradient_checkpointing_kwargs)


================================================
FILE: Assets/core.py
================================================
# custom code compatible with chattts 0.2.4
# adds the "local_dir" parameter

import os
import re
import logging
import tempfile
from dataclasses import dataclass, asdict
from typing import Literal, Optional, List, Tuple, Dict, Union
from json import load
from pathlib import Path

import numpy as np
import torch
from vocos import Vocos
from vocos.pretrained import instantiate_class
from huggingface_hub import snapshot_download

from .config import Config
from .model import DVAE, Embed, GPT, gen_logits, Tokenizer, Speaker
from .utils import (
    load_safetensors,
    check_all_assets,
    download_all_assets,
    select_device,
    get_latest_modified_file,
    del_all,
)
from .utils import logger as utils_logger

from .norm import Normalizer


class Chat:
    def __init__(self, logger=logging.getLogger(__name__)):
        self.logger = logger
        utils_logger.set_logger(logger)

        self.config = Config()

        self.normalizer = Normalizer(
            os.path.join(os.path.dirname(__file__), "res", "homophones_map.json"),
            logger,
        )
        with open(
            os.path.join(os.path.dirname(__file__), "res", "sha256_map.json")
        ) as f:
            self.sha256_map: Dict[str, str] = load(f)

        self.context = GPT.Context()

    def has_loaded(self, use_decoder=False):
        not_finish = False
        check_list = ["vocos", "gpt", "tokenizer", "embed"]

        if use_decoder:
            check_list.append("decoder")
        else:
            check_list.append("dvae")

        for module in check_list:
            if not hasattr(self, module):
                self.logger.warning(f"{module} not initialized.")
                not_finish = True

        return not not_finish

    def download_models(
        self,
        source: Literal["huggingface", "local", "custom"] = "local",
        force_redownload=False,
        custom_path: Optional[torch.serialization.FILE_LIKE] = None,
        cache_dir: Optional[str] = None,
        local_dir: Optional[str] = None,
    ) -> Optional[str]:
        if source == "local":
            download_path = local_dir if local_dir else (custom_path if custom_path is not None else os.getcwd())
            if (
                not check_all_assets(Path(download_path), self.sha256_map, update=True)
                or force_redownload
            ):
                with tempfile.TemporaryDirectory() as tmp:
                    download_all_assets(tmpdir=tmp, homedir=download_path)
                if not check_all_assets(
                    Path(download_path), self.sha256_map, update=False
                ):
                    self.logger.error(
                        "download to local path %s failed.", download_path
                    )
                    return None
        elif source == "huggingface":
            try:
                if local_dir:
                    download_path = snapshot_download(
                        repo_id="2Noise/ChatTTS",
                        allow_patterns=["*.yaml", "*.json", "*.safetensors", "spk_stat.pt", "tokenizer.pt"],
                        local_dir=local_dir,
                        force_download=force_redownload
                    )
                    if not check_all_assets(Path(download_path), self.sha256_map, update=False):
                        self.logger.error("Model verification failed")
                        return None
                elif cache_dir:
                    download_path = snapshot_download(
                        repo_id="2Noise/ChatTTS",
                        allow_patterns=["*.yaml", "*.json", "*.safetensors", "spk_stat.pt", "tokenizer.pt"],
                        cache_dir=cache_dir,
                        force_download=force_redownload
                    )
                    if not check_all_assets(Path(download_path), self.sha256_map, update=False):
                        self.logger.error("Model verification failed")
                        return None
                else:
                    try:
                        download_path = (
                            get_latest_modified_file(
                                os.path.join(
                                    os.getenv(
                                        "HF_HOME", os.path.expanduser("~/.cache/huggingface")
                                    ),
                                    "hub/models--2Noise--ChatTTS/snapshots",
                                )
                            )
                            if custom_path is None
                            else get_latest_modified_file(
                                os.path.join(custom_path, "models--2Noise--ChatTTS/snapshots")
                            )
                        )
                    except:
                        download_path = None
                    if download_path is None or force_redownload:
                        self.logger.log(
                            logging.INFO,
                            f"download from HF: https://huggingface.co/2Noise/ChatTTS",
                        )
                        try:
                            download_path = snapshot_download(
                                repo_id="2Noise/ChatTTS",
                                allow_patterns=["*.yaml", "*.json", "*.safetensors", "spk_stat.pt", "tokenizer.pt"],
                                cache_dir=custom_path,
                                force_download=force_redownload,
                            )
                        except:
                            download_path = None
                        else:
                            self.logger.log(
                                logging.INFO,
                                f"load latest snapshot from cache: {download_path}",
                            )
            except Exception as e:
                self.logger.error(f"Failed to download models: {str(e)}")
                download_path = None
        elif source == "custom":
            self.logger.log(logging.INFO, f"try to load from local: {custom_path}")
            if not check_all_assets(Path(custom_path), self.sha256_map, update=False):
                self.logger.error("check models in custom path %s failed.", custom_path)
                return None
            download_path = custom_path

        if download_path is None:
            self.logger.error("Model download failed")
            return None

        return download_path

    def load(
        self,
        source: Literal["huggingface", "local", "custom"] = "local",
        force_redownload=False,
        compile: bool = False,
        custom_path: Optional[torch.serialization.FILE_LIKE] = None,
        device: Optional[torch.device] = None,
        coef: Optional[torch.Tensor] = None,
        use_flash_attn=False,
        use_vllm=False,
        experimental: bool = False,
        cache_dir: Optional[str] = None,
        local_dir: Optional[str] = None,
    ) -> bool:
        download_path = self.download_models(
            source, 
            force_redownload, 
            custom_path, 
            cache_dir, 
            local_dir
        )
        if download_path is None:
            return False
        return self._load(
            device=device,
            compile=compile,
            coef=coef,
            use_flash_attn=use_flash_attn,
            use_vllm=use_vllm,
            experimental=experimental,
            **{
                k: os.path.join(download_path, v)
                for k, v in asdict(self.config.path).items()
            },
        )

    def unload(self):
        logger = self.logger
        self.normalizer.destroy()
        del self.normalizer
        del self.sha256_map
        del_list = ["vocos", "gpt", "decoder", "dvae", "tokenizer", "embed"]
        for module in del_list:
            if hasattr(self, module):
                delattr(self, module)
        self.__init__(logger)

    def sample_random_speaker(self) -> str:
        return self.speaker.sample_random()

    def sample_audio_speaker(self, wav: Union[np.ndarray, torch.Tensor]) -> str:
        return self.speaker.encode_prompt(self.dvae.sample_audio(wav))

    @dataclass(repr=False, eq=False)
    class RefineTextParams:
        prompt: str = ""
        top_P: float = 0.7
        top_K: int = 20
        temperature: float = 0.7
        repetition_penalty: float = 1.0
        max_new_token: int = 384
        min_new_token: int = 0
        show_tqdm: bool = True
        ensure_non_empty: bool = True
        manual_seed: Optional[int] = None

    @dataclass(repr=False, eq=False)
    class InferCodeParams(RefineTextParams):
        prompt: str = "[speed_5]"
        spk_emb: Optional[str] = None
        spk_smp: Optional[str] = None
        txt_smp: Optional[str] = None
        temperature: float = 0.3
        repetition_penalty: float = 1.05
        max_new_token: int = 2048
        stream_batch: int = 24
        stream_speed: int = 12000
        pass_first_n_batches: int = 2

    def infer(
        self,
        text,
        stream=False,
        lang=None,
        skip_refine_text=False,
        refine_text_only=False,
        use_decoder=True,
        do_text_normalization=True,
        do_homophone_replacement=True,
        split_text=True,
        max_split_batch=4,
        params_refine_text=RefineTextParams(),
        params_infer_code=InferCodeParams(),
    ):
        self.context.set(False)

        if split_text and isinstance(text, str):
            if "\n" in text:
                text = text.split("\n")
            else:
                text = re.split(r"(?<=。)|(?<=\.\s)", text)
                nt = []
                if isinstance(text, list):
                    for t in text:
                        if t:
                            nt.append(t)
                    text = nt
                else:
                    text = [text]
            self.logger.info("split text into %d parts", len(text))
            self.logger.debug("%s", str(text))

        if len(text) == 0:
            return []

        res_gen = self._infer(
            text,
            stream,
            lang,
            skip_refine_text,
            refine_text_only,
            use_decoder,
            do_text_normalization,
            do_homophone_replacement,
            split_text,
            max_split_batch,
            params_refine_text,
            params_infer_code,
        )
        if stream:
            return res_gen
        elif not refine_text_only:
            stripped_wavs = []
            thr = np.float32(1e-5)
            for wavs in res_gen:
                for wav in wavs:
                    stripped_wavs.append(wav[np.abs(wav) > thr])
            if split_text:
                return [np.concatenate(stripped_wavs)]
            return stripped_wavs
        else:
            return next(res_gen)

    def interrupt(self):
        self.context.set(True)

    @torch.no_grad()
    def _load(
        self,
        vocos_ckpt_path: str = None,
        dvae_ckpt_path: str = None,
        gpt_ckpt_path: str = None,
        embed_path: str = None,
        decoder_ckpt_path: str = None,
        tokenizer_path: str = None,
        device: Optional[torch.device] = None,
        compile: bool = False,
        coef: Optional[str] = None,
        use_flash_attn=False,
        use_vllm=False,
        experimental: bool = False,
    ):
        if device is None:
            device = select_device(experimental=experimental)
            self.logger.info("use device %s", str(device))
        self.device = device
        self.device_gpt = device if "mps" not in str(device) else torch.device("cpu")
        self.compile = compile

        feature_extractor = instantiate_class(
            args=(), init=asdict(self.config.vocos.feature_extractor)
        )
        backbone = instantiate_class(args=(), init=asdict(self.config.vocos.backbone))
        head = instantiate_class(args=(), init=asdict(self.config.vocos.head))
        vocos = (
            Vocos(feature_extractor=feature_extractor, backbone=backbone, head=head)
            .to(
                # Vocos on mps will crash, use cpu fallback.
                # Plus, complex dtype used in the decode process of Vocos is not supported in torch_npu now,
                # so we put this calculation of data on CPU instead of NPU.
                "cpu"
                if "mps" in str(device) or "npu" in str(device)
                else device
            )
            .eval()
        )
        assert vocos_ckpt_path, "vocos_ckpt_path should not be None"
        vocos.load_state_dict(load_safetensors(vocos_ckpt_path))
        self.vocos = vocos
        self.logger.log(logging.INFO, "vocos loaded.")

        # computation of MelSpectrogram on npu is not support now, use cpu fallback.
        dvae_device = torch.device("cpu") if "npu" in str(self.device) else device
        dvae = DVAE(
            decoder_config=asdict(self.config.dvae.decoder),
            encoder_config=asdict(self.config.dvae.encoder),
            vq_config=asdict(self.config.dvae.vq),
            dim=self.config.dvae.decoder.idim,
            coef=coef,
            device=dvae_device,
        )
        coef = str(dvae)
        assert dvae_ckpt_path, "dvae_ckpt_path should not be None"
        dvae.load_pretrained(dvae_ckpt_path, dvae_device)
        self.dvae = dvae.eval()
        self.logger.log(logging.INFO, "dvae loaded.")

        embed = Embed(
            self.config.embed.hidden_size,
            self.config.embed.num_audio_tokens,
            self.config.embed.num_text_tokens,
            self.config.embed.num_vq,
        )
        embed.load_pretrained(embed_path, device=device)
        self.embed = embed.to(device)
        self.logger.log(logging.INFO, "embed loaded.")

        gpt = GPT(
            gpt_config=asdict(self.config.gpt),
            embed=self.embed,
            use_flash_attn=use_flash_attn,
            use_vllm=use_vllm,
            device=device,
            device_gpt=self.device_gpt,
            logger=self.logger,
        ).eval()
        assert gpt_ckpt_path, "gpt_ckpt_path should not be None"
        gpt.load_pretrained(gpt_ckpt_path, embed_path, experimental=experimental)
        gpt.prepare(compile=compile and "cuda" in str(device))
        self.gpt = gpt
        self.logger.log(logging.INFO, "gpt loaded.")

        self.speaker = Speaker(
            self.config.gpt.hidden_size, self.config.spk_stat, device
        )
        self.logger.log(logging.INFO, "speaker loaded.")

        decoder = DVAE(
            decoder_config=asdict(self.config.decoder),
            dim=self.config.decoder.idim,
            coef=coef,
            device=device,
        )
        coef = str(decoder)
        assert decoder_ckpt_path, "decoder_ckpt_path should not be None"
        decoder.load_pretrained(decoder_ckpt_path, device)
        self.decoder = decoder.eval()
        self.logger.log(logging.INFO, "decoder loaded.")

        if tokenizer_path:
            self.tokenizer = Tokenizer(tokenizer_path)
            self.logger.log(logging.INFO, "tokenizer loaded.")

        self.coef = coef

        return self.has_loaded()

    def _infer(
        self,
        text: Union[List[str], str],
        stream=False,
        lang=None,
        skip_refine_text=False,
        refine_text_only=False,
        use_decoder=True,
        do_text_normalization=True,
        do_homophone_replacement=True,
        split_text=True,
        max_split_batch=4,
        params_refine_text=RefineTextParams(),
        params_infer_code=InferCodeParams(),
    ):

        assert self.has_loaded(use_decoder=use_decoder)

        if not isinstance(text, list):
            text = [text]

        text = [
            self.normalizer(
                t,
                do_text_normalization,
                do_homophone_replacement,
                lang,
            )
            for t in text
        ]

        self.logger.debug("normed texts %s", str(text))

        if not skip_refine_text:
            refined = self._refine_text(
                text,
                self.device,
                params_refine_text,
            )
            text_tokens = refined.ids
            text_tokens = [i[i.less(self.tokenizer.break_0_ids)] for i in text_tokens]
            text = self.tokenizer.decode(text_tokens)
            refined.destroy()
            if refine_text_only:
                if split_text and isinstance(text, list):
                    text = "\n".join(text)
                yield text
                return

        if split_text and len(text) > 1 and params_infer_code.spk_smp is None:
            refer_text = text[0]
            result = next(
                self._infer_code(
                    refer_text,
                    False,
                    self.device,
                    use_decoder,
                    params_infer_code,
                )
            )
            wavs = self._decode_to_wavs(
                result.hiddens if use_decoder else result.ids,
                use_decoder,
            )
            result.destroy()
            assert len(wavs), 1
            params_infer_code.spk_smp = self.sample_audio_speaker(wavs[0])
            params_infer_code.txt_smp = refer_text

        if stream:
            length = 0
            pass_batch_count = 0
        if split_text:
            n = len(text) // max_split_batch
            if len(text) % max_split_batch:
                n += 1
        else:
            n = 1
            max_split_batch = len(text)
        for i in range(n):
            text_remain = text[i * max_split_batch :]
            if len(text_remain) > max_split_batch:
                text_remain = text_remain[:max_split_batch]
            if split_text:
                self.logger.info(
                    "infer split %d~%d",
                    i * max_split_batch,
                    i * max_split_batch + len(text_remain),
                )
            for result in self._infer_code(
                text_remain,
                stream,
                self.device,
                use_decoder,
                params_infer_code,
            ):
                wavs = self._decode_to_wavs(
                    result.hiddens if use_decoder else result.ids,
                    use_decoder,
                )
                result.destroy()
                if stream:
                    pass_batch_count += 1
                    if pass_batch_count <= params_infer_code.pass_first_n_batches:
                        continue
                    a = length
                    b = a + params_infer_code.stream_speed
                    if b > wavs.shape[1]:
                        b = wavs.shape[1]
                    new_wavs = wavs[:, a:b]
                    length = b
                    yield new_wavs
                else:
                    yield wavs
            if stream:
                new_wavs = wavs[:, length:]
                keep_cols = np.sum(np.abs(new_wavs) > 1e-5, axis=0) > 0
                yield new_wavs[:][:, keep_cols]

    @torch.inference_mode()
    def _vocos_decode(self, spec: torch.Tensor) -> np.ndarray:
        if "mps" in str(self.device) or "npu" in str(self.device):
            return self.vocos.decode(spec.cpu()).cpu().numpy()
        else:
            return self.vocos.decode(spec).cpu().numpy()

    @torch.inference_mode()
    def _decode_to_wavs(
        self,
        result_list: List[torch.Tensor],
        use_decoder: bool,
    ):
        decoder = self.decoder if use_decoder else self.dvae
        max_x_len = -1
        if len(result_list) == 0:
            return np.array([], dtype=np.float32)
        for result in result_list:
            if result.size(0) > max_x_len:
                max_x_len = result.size(0)
        batch_result = torch.zeros(
            (len(result_list), result_list[0].size(1), max_x_len),
            dtype=result_list[0].dtype,
            device=result_list[0].device,
        )
        for i in range(len(result_list)):
            src = result_list[i]
            batch_result[i].narrow(1, 0, src.size(0)).copy_(src.permute(1, 0))
            del src
        del_all(result_list)
        mel_specs = decoder(batch_result)
        del batch_result
        wavs = self._vocos_decode(mel_specs)
        del mel_specs
        return wavs

    @torch.no_grad()
    def _infer_code(
        self,
        text: Tuple[List[str], str],
        stream: bool,
        device: torch.device,
        return_hidden: bool,
        params: InferCodeParams,
    ):

        gpt = self.gpt

        if not isinstance(text, list):
            text = [text]

        assert len(text), "text should not be empty"

        if not isinstance(params.temperature, list):
            temperature = [params.temperature] * self.config.gpt.num_vq
        else:
            temperature = params.temperature

        input_ids, attention_mask, text_mask = self.tokenizer.encode(
            self.speaker.decorate_code_prompts(
                text,
                params.prompt,
                params.txt_smp,
                params.spk_emb,
            ),
            self.config.gpt.num_vq,
            prompt=(
                self.speaker.decode_prompt(params.spk_smp)
                if params.spk_smp is not None
                else None
            ),
            device=self.device_gpt,
        )
        start_idx = input_ids.shape[-2]

        num_code = self.config.gpt.num_audio_tokens - 1

        logits_warpers, logits_processors = gen_logits(
            num_code=num_code,
            top_P=params.top_P,
            top_K=params.top_K,
            repetition_penalty=params.repetition_penalty,
        )

        if gpt.is_vllm:
            from .model.velocity import SamplingParams

            sample_params = SamplingParams(
                temperature=temperature,
                max_new_token=params.max_new_token,
                max_tokens=8192,
                min_new_token=params.min_new_token,
                logits_processors=(logits_processors, logits_warpers),
                eos_token=num_code,
                infer_text=False,
                start_idx=start_idx,
            )
            input_ids = [i.tolist() for i in input_ids]

            result = gpt.llm.generate(
                None,
                sample_params,
                input_ids,
            )

            token_ids = []
            hidden_states = []
            for i in result:
                token_ids.append(torch.tensor(i.outputs[0].token_ids))
                hidden_states.append(
                    i.outputs[0].hidden_states.to(torch.float32).to(self.device)
                )

            del text_mask, input_ids

            return [
                GPT.GenerationOutputs(
                    ids=token_ids,
                    hiddens=hidden_states,
                    attentions=[],
                ),
            ]

        emb = self.embed(input_ids, text_mask)

        del text_mask

        if params.spk_emb is not None:
            self.speaker.apply(
                emb,
                params.spk_emb,
                input_ids,
                self.tokenizer.spk_emb_ids,
                self.gpt.device_gpt,
            )

        result = gpt.generate(
            emb,
            input_ids,
            temperature=torch.tensor(temperature, device=device),
            eos_token=num_code,
            attention_mask=attention_mask,
            max_new_token=params.max_new_token,
            min_new_token=params.min_new_token,
            logits_processors=(*logits_processors, *logits_warpers),
            infer_text=False,
            return_hidden=return_hidden,
            stream=stream,
            show_tqdm=params.show_tqdm,
            ensure_non_empty=params.ensure_non_empty,
            stream_batch=params.stream_batch,
            manual_seed=params.manual_seed,
            context=self.context,
        )

        del emb, input_ids

        return result

    @torch.no_grad()
    def _refine_text(
        self,
        text: str,
        device: torch.device,
        params: RefineTextParams,
    ):

        gpt = self.gpt

        if not isinstance(text, list):
            text = [text]

        input_ids, attention_mask, text_mask = self.tokenizer.encode(
            self.speaker.decorate_text_prompts(text, params.prompt),
            self.config.gpt.num_vq,
            device=self.device_gpt,
        )

        logits_warpers, logits_processors = gen_logits(
            num_code=self.tokenizer.len,
            top_P=params.top_P,
            top_K=params.top_K,
            repetition_penalty=params.repetition_penalty,
        )

        if gpt.is_vllm:
            from .model.velocity import SamplingParams

            sample_params = SamplingParams(
                repetition_penalty=params.repetition_penalty,
                temperature=params.temperature,
                top_p=params.top_P,
                top_k=params.top_K,
                max_new_token=params.max_new_token,
                max_tokens=8192,
                min_new_token=params.min_new_token,
                logits_processors=(logits_processors, logits_warpers),
                eos_token=self.tokenizer.eos_token,
                infer_text=True,
                start_idx=input_ids.shape[-2],
            )
            input_ids_list = [i.tolist() for i in input_ids]
            del input_ids

            result = gpt.llm.generate(
                None, sample_params, input_ids_list, params.show_tqdm
            )
            token_ids = []
            hidden_states = []
            for i in result:
                token_ids.append(torch.tensor(i.outputs[0].token_ids))
                hidden_states.append(i.outputs[0].hidden_states)

            del text_mask, input_ids_list, result

            return GPT.GenerationOutputs(
                ids=token_ids,
                hiddens=hidden_states,
                attentions=[],
            )

        emb = self.embed(input_ids, text_mask)

        del text_mask

        result = next(
            gpt.generate(
                emb,
                input_ids,
                temperature=torch.tensor([params.temperature], device=device),
                eos_token=self.tokenizer.eos_token,
                attention_mask=attention_mask,
                max_new_token=params.max_new_token,
                min_new_token=params.min_new_token,
                logits_processors=(*logits_processors, *logits_warpers),
                infer_text=True,
                stream=False,
                show_tqdm=params.show_tqdm,
                ensure_non_empty=params.ensure_non_empty,
                manual_seed=params.manual_seed,
                context=self.context,
            )
        )

        del emb, input_ids

        return result

================================================
FILE: Assets/user_manual_consolidated.md
================================================
### What is the VectorDB-Plugin and what can it do?
VectorDB-Plugin is a program that lets you build a vector database from your documents (text files, PDFs, images, etc.) and use it
with a large language model for more accurate answers. This approach is known as Retrieval Augmented Generation (RAG) – the software
finds relevant pieces of your data (embeddings) and feeds them into an AI chat model so the answers are based on your own content.
In simple terms, VectorDB-Plugin "supercharges" a language model by giving it a memory of your files, which improves the factual
accuracy of responses. You can search your database by asking questions in plain language, and the program will retrieve matching
chunks from your data and have the chat model incorporate them into its answer.

### What are the system requirements and prerequisites?
System Requirements for VectorDB-Plugin include a Windows operating system (Windows 10 or 11) and Python (version 3.11 or 3.12 is
recommended). You should also have Git installed (with Git LFS for handling large model files) and Pandoc (a document converter).
If you plan to use GPU acceleration or certain models, you'll need a suitable C++ compiler and possibly Visual Studio build tools
on Windows. An NVIDIA GPU is optional but can greatly speed up embedding and model inference (the program will also work on CPU,
just more slowly). Make sure you have sufficient disk space for storing models and databases – vector models and chat models can
be several hundred MBs to a few GBs each.

### Why is Visual Studio required to run this program?
Visual Studio is requried to run this program because some of the libraries that it relies on must be compiled before they can be
installed.  A common order that you will receive if you have not installed Visual Studio will state that
"Microsoft Visual C++ 14.0 or greater is required" making it clear that you have not installed it correctly. Moreover, when
installing Visual Studio you must also install "Build Tools" or select certain features.  For example, when installing
Visual Studio Build Tools 2022 you must choose "Desktop development with C++ workload" from the righthand side and check the boxes
for "MSVC v143 – VS 2022 C++ x64/x86 build tools...", "Windows 10 SDK (10.0.19041.0 or later)," or "Windows 11 SDK (10.0.22621.0),"
"C++ CMake tools for Windows," "C++ CMake tools for Windows," "C++ AddressSanitizer," and potentially others.

### How do I install and launch the VectorDB-Plugin?
Download the latest release from the GitHub repository (look for a ZIP file under Releases). Extract the ZIP archive to a folder of
your choice.  Create a virtual environment by opening a command prompt within the "src" directory of the extracted files by running
the command "python -m venv ." The second step is to activate the virtual environment by running the command ".\Scripts\activate".
Third, run the setup script with the command "python setup_windows.py". It is important to note that this progam is only supported
on Windows at this time.  Lastly, you run the program by using the command "python gui.py". A window should open with this program's
graphical user interface.

### How do I download or add embedding models?
The Models Tab lets you browse and download embedding models.  Models are grouped by providers with properties listed for each
embedding model.  To download a model, click the radio button next to the modle you want to download and then click
"Download Selected Model".  This will save the necessary model files to the "Models/Vector/" folder if you want to inspect them. The
Original Precision of an embedding model is the original floating point format that a model was saved to by the creator - e.g. float32,
float16 etc. The Parameters of an embedding model refers to how many parameters a particular model has - e.g. 109m means 109 million
parameters. The Dimensions of an embedding model refers to how complex of embeddings that a particular model created.  More complexity
means the higher quality generally within the same embedding model family.  For example, dimensions such as 768 or 1024. The Max
Sequence of an embedding model refers to the maximum amount of tokens that an embedding model can process at a given time.  The size
of a model refers to the size on disk.

### How do I query the database for answers?
Select the database you want to query from the dropdown menu. Choose a backend model for answering Local Models built-in AI Kobold
LM Studio or ChatGPT each option uses different AI systems to generate responses. Enter your question in natural language in the
text box for example what does the quarterly report say about revenue. If you only want to see the retrieved information without
AI processing check the chunks only box. Click Submit Question the system searches your database for relevant content using semantic
similarity. The results will display both the retrieved chunks so you can verify sources and a complete answer generated by your
chosen AI model based on those chunks. You can continue with follow-up questions or new queries as needed.

### Which chat backend should I use?
The program offers four options for generating answers from your database content. The Local Models backend uses chat models downloaded
directly from Huggingface and does not rely on any exernal program. The Kobold backend connects to a Kobold server that has already
loaded a chat model.  You must download Kobold prior to using this backend and set it up correctly.  The LM Studio backend is similar
in that it requires downloading an external program prior to using it and setting it up correctly.  The ChatGPT backed uses the API
from Openai and connects to one of several models. You must first create an account with Openai and get an API key, which must then
be entered into this program from the menu at the top.  Unlike the other backends, the ChatGPT backend cannot run without an Internet
connection.

### What is LM Studio chat model backend?
LM Studio is an application that allows users to run and interact with local language models on their own hardware. This program
integrates with LM Studio, and the GitHub repository contains detailed instructions for setup and usage. When you query the vector
database within the Query Database tab you can choose LM Studio as the backend that ultimately receives the query (along with the
contexts from the vector database) and provides a response to your question.  LM Studio can be downloaded from this website:
https://lmstudio.ai/.  The documentation regarding how to properly set up the program is here: https://lmstudio.ai/docs/app.

### What is Kobold chat model backend?
Kobold is an application that allows users to run and interact with local language models on their own hardware. This program
integrates with Kobold, and the GitHub repository contains detailed instructions for setup and usage. When you query the vector
database within the Query Database tab you can choose Kobold as the backend that ultimately receives the query (along with the
contexts from the vector database) and provides a response to your question.  You can get the latest release from Kobold from this
website: https://github.com/LostRuins/koboldcpp.  On Windows machines, it is crucial that you do two things before using Kobold.  First,
right-click on the file and check the "Unblock" checkbox near the bottom.  Secondly, you must click the "Compatibility" tab and check
the box that says "Run this program as an administrator."  Without these steps it will likely fail.  The documentation regarding how
to use Kobold is here: https://github.com/LostRuins/koboldcpp/wiki.

### What is the OpenAI GPT Chat Model Backend?
The Chat GPT models backend allows you to send queries directly to OpenAI and get a response.  To do so you must first have an API key.
To get an API key for accessing OpenAI's large language models, first create an account by visiting OpenAI's signup page and completing
the registration. Once logged in, go to the API keys page, click "Create new secret key," optionally name it, and then click
"Create secret key" to generate it. Make sure to copy and store the key securely, as it won't be shown again. To activate the key,
visit the Billing section and add your payment details. For a more detailed walkthrough, you can refer to this step-by-step tutorial.

### What local chat models are available and how can I use them?
The "local models" option within the Query Database Tab downloads chat models directly from Huggingface and requires no external program.
You can select a local model from the pulldown menu and when you use it for the first time it will automatically download the model and
it can then be used thereafter for subsequent queries.  Please note that certain models are "gated," which means that you must first
enter a huggingface access token.  You can create an access token on Huggingface's website and then enter it within the "File" menu
within this program in the upper left. You must do this before trying to use certain "gated" "local models".  To get a Huggingface
access token you must create a huggingface account and then go to your profile.  On the left-hand side will be an "Access Tokens"
option.  Then in the upper right is a "Create new token" button.  Check the box that says "Read access to contents of all public
gated repos you can access" then click "Create token."

### How do I get a huggingface access token?
Some chat models in this program are "gated" and require a Huggingface access token.  If a model is gated and you haven't provided an
access token this program will notify you.  To obtain an access token you must create a huggingface account and then go to your profile.
On the left-hand side will be an "Access Tokens" option.  Once clicked, in the upper right is a "Create new token" button.  Check the
box that says "Read access to contents of all public gated repos you can access" then click "Create token."  You can then enter the
access token in this program by going to the "File" menu and selecting "Huggingface Access Token."  You can subsequently change your
access token within this program by repeating the same steps.

### What is a context limit or maximum sequence length?
The phrase "context limit" refers to the maximum number of tokens that a model can handle at once.  With chat model the phrase
"context limit" is usually used and with embedding models it is customary to use the phrase "maximum sequence length."  Regardless,
it refers to the same thing.  When you choose a chunk size in this program it is important to make sure that the chunk size does not
exceed the maximum sequence length of the embedding model.  You can see each model's limit in the Models Tab.  Remember, these limits
are given in tokens wherease the chunk size setting is in characters.  This is because the text extraction and splitting operates in
terms of characters.  On average, one token is three to four character so you will need to do some rough math when setting the chunk
size setting to make sure that it does not exceed the embedding model's maximum sequence length.

### What happens if I exceed the maximum sequence length of an embedding model?
If the chunks you create will exceed the embedding model's maximum sequence length they will be truncated, leading to suboptimal search
results.  In other words, if a chunk is too long the end will be cut off before the embeddings are created in order to ensure that
the chunk is less than the maximum sequence length.  This obviously leads to suboptimal search results because some meaning is lost.
You can check the maximum sequence length for all embedding models that this program uses by inspecting the model within the Models Tab.
It is very important that you know the maximum sequence length before using an embedding model.

### How many contexts should I retrieve when querying the vector database?
For simple question-answer use cases, 3-6 chunks should suffice. For a typical book, a chunk size of 1200 characters with an
overlap of 600 characters can return up to 6 contexts. Advanced embedding models are often capable of retrieving the most relevant
context in the first or second result.  If you are not getting relevant results in the first three to six results then you desperately
need to revise your queries because the issue is not with the number of contexts being returned.  The type of query and how your phrase
it can be even more important than the actual number of chunks returned.  With that said, there are use cases for returning a lot of
chunks as well for more complex scenarios, especially now that a lot of chat models have extended context limits.  To give one example,
let's say that you embed a lot of court cases and then ask a question of "What are the exceptions to the hearsay rule of evidence?"
It might be reasonable to request 20-30 contexts, which are then fed to the chat model for a synthesized response.

### What does the chunks only checkbox do?
Typically when you submit a query within the Query Database Tab it connects to your chosen backend to get a response from a chat model.
However, if you check the "chunks only" checkbox it will only return the chunks retrieved from the vector database.  This is good
for seeing verbatim what would be sent to the chat model backend in case you need that level of detail, but the primary purpose is to
enable users to see the quality of the chunks that they are creating.  For example, it gives you an idea of whether the chunks size
setting you chose is sufficient, or it gives users an idea of whether a particular embedding model is creating a high enough quality
of embeddings for their particular use case.

### What are embedding or vector models?
Embedding models, which are sometimes referred to as vector models, are large language models specifically trained to convert a
chunk of text into a number that represents the meaning of that number.  This number, referred to as an "embedding" or "vector" can
then be entered into database to be searched for similar vectors.

### Which embedding or vector model should I choose?
There are several considerations when choosing which embedding model to use, which are important to understand because it can take
significant time and compute resources to create a vector database.  First, the size of the embedding model and how much VRAM it uses
is a factor.  In general, the large and more compute resources required for a model, the higher quality embeddings that it will produce.
Also, the maximum sequence lengh of the model can be a factor.  Most embedding models have traditionally had a 512 token limit but
modern models now have limits of 8192 tokens or even higher.  Thirdly, some embedding models are trained on specific languages like
English while others are multilingual.  All of these characteristics can be viewed within the Models Tab as well as the hyperlinks
on the Models Tab to repository for each model so you can read more about each model.

### What are the dimensions of a vector or embedding model?
The dimensions of a vector model refers to the level of detail of the embeddings that an embedding model will create.  The more
dimensions means a greater level of detail and higher quality embedding, but will require more time and computer resources to create.
Technically speaking, the number of dimensions refers to the size of the array of numbers that is the "embedding," which, as
described previously, represents the semantic meaning of a chunk of text.  For example, the array of numbers might have 384 numbers,
because the embedding model has 384 dimensions.

### What are some general tips for choosing an embedding model?
Try to use as high of a quality of an embedding model as your system resources will allow.  Although there are exceptions for newer
embedding models, embedding models typically do not use as much VRAM as typical chat models, so the real limitation when choosing
an embedding model is how much compute time you are willing to spend before the vector database is create.  It is highly recommented
to choose as high a quality of embedding model as possible.  Also, if compute resources are limited make sure and check the "half"
checkbox within the Settings Tab.  This will run the embedding model in either bfloat16 or float16 (commonly referred to as half
precision).  Studies show that there is very little loss in quality between full precision and half precision.  Lastly, always use
"cuda" within the Settings Tab when creating embeddings if you have a GPU.

### What Are Vision Models?
Vision models are a category of large language models trained to understand what is in an image.  For purposes of this program,
they are used to understand what's in an image, generate a summary for an image, which can then be put into the vector database.
This program allows you to choose from multiple vision models within the Settings Tab.  Before you take a lot of time to process
a lot of images it is highly recommended that you test the various vision models within the Tools Tab to find one that suits you.

### What vision models are available in this program?
The vision models that you can use in this program can be seen within the Settings Tab in the pulldown menu where you select the
vision model you want to use.  Each of these vision models can be researched on the huggingface website if you need more details.
Also, you can Ask Jeeves for more information about a specific family of models.  In general, the visions models are arranged within
this pulldown menu from smallest at top to largest at the bottom.  The larger the model generally means the higher quality results you
will get, but not always.  Smaller vision models that are newer sometimes outperform larger but older vision models.  Also, some
vision models excel at certain types of images over other types. The best strategy to choose an appropriate vision models before
committing to processing a large number of images is to go to the Tools Tab and test the various vision models.  You can Ask Jeeves
for details of how to do this.

### Do you have any tips for choosing a vision model?
When choosing a vision model it is recommended to choose the highest quality model that your system can run taking into consideration
the amount of compute time you are willing to spend.  Each vision model requires a certain amount of VRAM to use, which is typically
much higher than embedding models.  It is highly recommended to test all the models on a single image, which you can do within the
Tools Tab, or if you already know your VRAM limitations, only test the vision models you know you have the resources to run.  The
Tools Tab allows you to test a particular vision model on multiple images or multiple visions models on a single image.  Either way
it's important to get a feel for the vision models' quality and compute resources required before committing to procesdsing a lot
of images that will be put into a vector database.

### What is whisper and how does this program use voice recording or transcribing an audio file?
Whisper is an advanced speech recognition model developed by OpenAI that transcribes audio into text. This program uses whisper models
in two ways.  First, to allow users to record their voice into the question box when querying the vector database.  This can be done
within the Query Database Tab; simply click the "Voice Recorder" button, record your question, and it will be output to the query box.
Secondly, whisper models are used to create transcriptions of audio files that can subsequently be entered into a vector database.
You can create these transcriptions within the Tools Tab.  This will create a transcript of an audio file, which you will see within
the Create Database Tab before creating the vector database.

### How can I record my question for the vector database query?
To transcribe a spoken question, go to the "Query Database" tab, click the "Voice Recorder" button to begin recording
and then speak clearly. Click the button again to stop recording, and the transcribed text will appear in the question box.

### How can I transcribe an audio file to be put into the vector database?
To transcribe an audio file, navigate to the Tools tab, select an audio file (most file formats are supported such as .mp3, .wav,
.m4a, .ogg, .wma, and .flac) and click the Transcribe button. After the transcription is complete you can see it in the
"Create Database" tab and it will be entered into the vector database when you create it.  The transcribing functionality uses
the powerful `WhisperS2T` library with the `Ctranslate2` backend.  Make sure to adjust the "Batch" setting when transcribing an
audio file depending on the size of the whisper model you choose. Increasing the batch size can improve speed but demands more
VRAM, so care should be taken not to exceed your GPU’s capacity.

### What are the distil variants of the whisper models when transcribing and audio file?
Distil variants of Whisper models use approximately 70% of the resources of their full counterparts and are faster with very little
loss in quality.

### What whisper model should I choose to transcribe a file?
When transcribing an audio file in order to put it into a vector database it is generally recommended to use as high a quality of
a whisper model as your hardware will support.  The quality of a whisper model is determined by a few factors.  Firstly, its size
is the most important factor - e.g. large versus medium versus small.  Secondly, the precision of the model that you use.  This
program allows you to choose float32 for the highest qualityy or bfloat16 or float16 (i.e. half precision).  In general, using
half precision results in about 95% of the quality of float32 for half the compute resources needed.  Lastly, some of the whisper
models come in "distil" variants that have certain layers of the model removed.  Again, this typically gives approximately 95%
of the non-distil variant for half the compute resources.  It is highly recommended to test the various whisper models on a small
audio file first before committing to transcribing a large audio file, which can be done within the Tools Tab.

### What are floating point formats, precision, and quantization?
Understanding floating point formats is key when making decisions about model selection and quantization. Floating point formats
represent real numbers in binary using a combination of sign, exponent, and fraction (mantissa) bits. The sign bit indicates whether
the number is positive or negative. The exponent bits determine the range or magnitude of the value. The fraction or mantissa bits
control the precision of the value.

### What are the common floating point formats?
float32 32-bit floating point with 1 sign bit 8 exponent bits and 23 fraction bits this format provides high precision and a wide
range making it a standard choice for many computing tasks float16 16-bit floating point comprising 1 sign bit 5 exponent bits
and 10 fraction bits float16 offers reduced precision and range but uses less memory and computational power bfloat16 brain floating
point this format features 1 sign bit 8 exponent bits and 7 fraction bits it has the same range as float32 but with lower precision
making it particularly useful for deep learning applications range and precision comparison format float32 approximate range plus
or minus 1.4 times 10 to the minus 45 to plus or minus 3.4 times 10 to the 38 precision in decimal digits 6 to 9 format float16
approximate range plus or minus 6.1 times 10 to the minus 5 to plus or minus 6.5 times 10 to the 4 precision in decimal digits 3 to 4
format bfloat16 approximate range plus or minus 1.2 times 10 to the minus 38 to plus or minus 3.4 times 10 to the 38 precision in
decimal digits 2 to 3

### What are precision and range regarding floating point formats and which should I use?
The choice of floating point format has several key implications precision affects the detail and accuracy of computations range
determines the scale of values that can be represented trade-offs arise when opting for lower precision formats as they reduce
memory usage and increase processing speed but may slightly reduce accuracy

### What is Quantization?
Quantization reduces the precision of the numbers used to represent a model's parameters which results in smaller models and lower
computational requirements the main goals of quantization are to improve model speed reduce memory usage ram or vram and enable models
to run on resource-constrained hardware there are two main methods of quantization post-training quantization is applied after the
model is trained quantization-aware training incorporates quantization during the training process to minimize accuracy loss common
quantization levels include int8 8-bit integer which significantly reduces model size but may introduce quantization errors and
float16 or bfloat16 which reduces size with minimal impact on accuracy

### What are the aspects or effects of quantization?
model size reduction smaller data types take up less storage performance increase reduced data size speeds up computation potential
accuracy loss reduced precision may introduce errors though often negligible for many applications

## What settings are available in this program and how can I adjust them?
The "Settings" Tab contains most of the settings for LM Studio, querying the database, creating the database, the text to speech
functionality, and the vision models.  Please ask me a question about the specific setting or group of settings you're interested in?

### What are the LM Studio Server settings?
When using LM Studio as the chat model backend you can adjust a few settings from within the Settings Tab.  In general, however,
the LM Studio program has all the settings that you should adjust.  For purposes of this program you can adjust the port to match
what you set within LM Studio.  Also, there is a checkbox you can check to see the thinking process if the model you are running
within LM Studio has chain of thought.

### What are the database creation settings?
The Device setting allows you to choose either CPU or CUDA when creating a vector database.  It is always recommended to choose
CUDA if available.  The Chunk Size setting determines the size of the chunks of text that your documents will be broken into before
being turned into embeddings.  It is crucial to remember that this setting is in number of characters, not tokens, and that you must
keep the chunks within the maximum sequence length of the embedding model you are using, as expressed in tokens, and which you can
see within the Models Tab.  Remember, each tokens is approximately 3-4 characters.  The Overlap setting refers to how many characters
at the beginning of a chunk are from the preceding chunk.  When a document is processed sometimes it is split in the middle of an
important concept and this setting ensures that there is an overlap to avoid losing meaning.  A good rule of thumb is to set the
Overlap setting to 30-49 percent of the Chunk Size setting.  The half-precision setting, if checked, will run the embedding model
in half precision resulting in a slight reduction in quality but half the compute resources.

### What are the database query settings?
Within the Settings Tab you can adjust several settings when searching a vector database.  The Device setting allows you to choose
between CPU and CUDA.  In contrast to creating a vector database, it is recommended to always use CPU.  The Similarity setting sets
a threshhold of relevance for a chunk of text before it will be returned as a result.  You can set a value between zero and 1.  A
higher value will result in more chunks being returned but you should never use 1.  The Contexts setting determines the maximum
number of chunks that will be returned, again, subject to the Similarity setting.  The Search Term Filter will require that any chunks
returned include the specified term.  The File Type setting allows you to only search for chunks of text that originated from a
particular file type.

### How does the Contexts setting work exactly?
Within the Settings Tab the Contexts setting when searching a vector database will return up to that many chunks of text assuming they
all meet the Similarity setting that you choose.  In other words, it sets the upper limit.  If there are not that many chunks that also
meet the Similarity setting it is possible to receive fewer chunks than the Contexts setting.

### What is the similarity setting?
Within the Settings Tab the Similarity setting controls the requisite relevance of a chunk related to your query in order for it to
possibly be returned.  I say "possibly" because even though a chunk might meet the Similarity setting it might not be returned if, for
example, your Contexts setting limits the numbe of chunks that will be returned.  By defaut, this program will return chunks in order
from highest relevance to lowest.  It will return the most relevant chunks that meet the Similarity setting up to the maximum
number of chunks specified in the Contexts setting.  A higher Similarity setting means that more chunks will possibly be returned.
A good default value is .8, but do not go above 1.

### What is the search term filter setting?
Within the Settings Tab the Search Term Filter setting allows you to require that any chunks returned contain the specified search term.
It is not case-sensitive, but it does require an exact match.  For example, if you specify “child” it will only return chunks that
include the term "child" somewhere in it.  This would not include chunks that have the word "children" in it, however, since it
requires a verbatim match.  With that said, since it is not case-sensitive it would also include chunks with "Child" in them.  This
setting is especially useful when you know that a relevant chunk has a certain key word in it; otherwise, it is best to leave this blank.
Click the Clear Filter button to clear any filters.  Lastly, it is important to understand that this setting only applies after both
the Similarity and Contexts settings.  Therefore, if you set those settings too low you might not receive any chunks with your specified
search term.

### What is the File Type setting?
Within the Settings Tabe the File Type setting allows you to limit the chunks that are returned based on whether they originated from
a particular type of file.  Current options include images, documents, audio or all files.  It is best to use the all files option
unless you are sure that the chunks you are looking from originated from a particular type of file.

### What are text to speech models (aks TTS models) and how are they used in this program?
Text to speech models (TTS) are large language models that were specifically trained to take text as input and output audio in a spoken
voice format.  This program allows you to use TTS models to speak the response that you get after querying the vector database.

### What text to speech models are availble in this program to use?
You can choose various text to speech models within the Settings Tab.  The current options are Bark, WhisperSpeech, ChatTTS, and
Google TTS.  The Bark backend has a Normal size that produces slightly higher quality and and a Small version that uses fewer
resources.  With Bark you can choose different speaker voices such as v2/en_speaker_6, which is usually considered the highest
quality or v2/en_speaker_9, which is the only female voice.  Using Bark requires a GPU, however. The WhisperSpeech backend consists
of two models that you choose within the Settings Tab, both of which determine the quality.  Experiment with both to find a setting
that works with your hardware.  WhisperSpeech, like Bark, requires a GPU but is generally less compute intensive than Bark at roughly
the same quality.  The ChatTTS backend is also a good option that can be run both on GPU or CPU.  It produces audio slightly less
quality than Bark or WhisperSpeech.  Lastly, the Google TTS backend is the least compute intensive.  However, it does not require a
GPU and will instead connect to a free online Google service that provides TTS.

### What is the Bark text to speech?
Bark TTS by Suno AI is a fully generative, open-source text-to-audio model that produces highly expressive and realistic speech,
even capable of non-verbal vocalizations like laughter or sighs. Unlike traditional TTS systems that strictly follow input text,
Bark can "freestyle," deviating for prosodic expressiveness or ambient cues, which makes it especially useful for creative
applications like character dialogue, storytelling, and game development. It supports over 100 built-in speaker presets and
auto-detects more than a dozen languages, although English remains the most polished. Bark uses EnCodec and a GPT-style transformer
under the hood, trading speed for quality, and typically requires GPU acceleration. Despite its occasional unpredictability, its
rich emotional output and open MIT license make it a standout for experimental and expressive use cases.

### What is the WhisperSpeech text to speech?
WhisperSpeech by Collabora is a cutting-edge open-source project that "reverses" OpenAI's Whisper speech-to-text model to synthesize
speech from semantic audio tokens, offering an exciting glimpse into the future of modular, multilingual TTS. Inspired by Google’s
SPEAR-TTS, WhisperSpeech leverages Whisper’s deep linguistic understanding and language-neutral token representations to build a
multilingual, speaker-aware system that supports voice cloning and polyglot speech (e.g. the same voice speaking in multiple languages).
Though still under heavy development, early results show surprisingly natural and expressive audio, particularly given the open
model’s small size. It’s not yet plug-and-play like Bark or ChatTTS, but its transparency, voice customization potential, and strong
multilingual foundation make it a compelling choice for developers interested in training their own flexible, high-quality TTS pipeline.

### What is the ChatTTS text to speech?
ChatTTS is an open-source conversational TTS model specifically designed for dialogue generation, with a focus on natural prosody,
expressive timing, and multi-speaker interactions. Trained on over 100,000 hours of English and Chinese speech, it delivers highly
realistic and emotionally resonant voices tailored for chatbots and AI companions. Unlike many TTS engines, ChatTTS includes
conversational structure like speaker turns and can even insert interjections like laughter using special tokens. While it lacks a
large preset voice library like Bark, it can produce distinct speakers and supports fine-tuning on custom data. It runs efficiently
on consumer GPUs and offers Python bindings, making it one of the most practical and expressive TTS options for developers aiming to
build natural, back-and-forth conversational agents in English or Mandarin.

### What is the Google TTS text to speech?
Google TTS (Free Tier) offers industry-leading neural speech synthesis via a cloud API, backed by WaveNet and Neural2 models that
produce ultra-clear, stable, and emotionally nuanced voices across 380+ voices and 50+ languages. Although not open-source, it
provides a generous free tier (up to 4 million characters/month for standard voices and 1 million for WaveNet), making it highly
accessible for small-scale use. Developers can fine-tune pronunciation and pacing using SSML, and even select expressive “Newscaster”
or “Lively” voice styles. With near real-time performance via a fast cloud API and seamless Python integration, Google TTS is the gold
standard for high-quality, multilingual TTS — ideal for production-ready applications where speech quality, reliability, and global
language support outweigh the need for open-source control.

### What is the Chatterbox text to speech?
Chatterbox, developed by Resemble AI, is a cutting-edge open-source text-to-speech (TTS) model that sets a new standard in voice
synthesis. Released under the permissive MIT license, it offers developers and creators unparalleled freedom to use, modify, and
distribute the software. Chatterbox's standout features include zero-shot voice cloning, allowing it to mimic any voice with just a
few seconds of reference audio and emotion exaggeration control. Its alignment-informed inference ensures ultra-stable and
natural-sounding speech, making it ideal for real-time applications like voice assistants and interactive media. In blind evaluations,
Chatterbox has been consistently preferred over proprietary models like ElevenLabs, highlighting its superior performance in
generating high-quality, expressive speech. With its combination of advanced features, open-source accessibility, and exceptional
speech synthesis quality, Chatterbox stands out as a powerful tool for developers seeking a versatile and ethical TTS solution.

### Which text to speech backend or models should I use
Generally it's recommended to experiment with each to your liking.  However, in general Bark and WhisperSpeech produce the highest
quality results, Chat TTS and Chatterbox are below them but can be run on GPU as well as CPU, and Google TTS is comparable to
Chat TTS in terms of quality but requires an Internet connection.

### Can I back up or restore my databases and are they backed up automatically
When you create a vector database it is automatically backed up.  However, if you want to manually backup all databases you can go
to the "Tools" tab and click the Backup All Databases button.  Likewise, you can restore all backed up databases within the Tools Tab.

### What happens if I lose a configuration file and can I restore it?
This program cannot function without the config.yaml file if you lose it accidentally or it gets corrupted for some reason you can
restore a default version by if necessary copy the original configyaml from the assets folder to the main directory delete old files
and folders in vector_db and vector_db_backup to prevent conflicts

### What are some good tips for searching a vector database?
To improve your search results when searching a vector database it is important to understand the relationship between the various
settings within the Settings Tab.  When a vector database is searched it will first identify candidate chunks to return that meet the
Similarity setting.  Once it does that it will return the most relevant chunks up to the limit of the number of chunks that you set
with the Contexts setting.  After that, it will apply the Search Term Filter setting to remove any chunks that do not contain the
verbatim search term (remember, this is case-insensitive howver).  After that, these chunks are what are then sent to the chat model
along with your initial query to get a response.

### General VRAM Considerations
To conserve VRAM, disconnect secondary monitors from the GPU and, if available, use motherboard graphics ports instead. This requires
enabling integrated graphics in the BIOS, which is often disabled by default when a dedicated GPU is installed. This can be
particularly useful if your CPU has integrated graphics, such as Intel CPUs without an "F" suffix, which support motherboard
graphics ports.

### How can I manage vram?
For optimal performance, ensure that the entire LLM is loaded into VRAM. If only part of the model is loaded, performance can be
significantly degraded. It’s also important to manage VRAM efficiently by ejecting unused models when creating the vector database
and reloading the LLM after the database creation is complete. When querying the vector database, using the CPU instead of the GPU
is recommended to conserve VRAM for the LLM, as querying is less resource-intensive and can be effectively handled by the CPU.

### What are the speed and VRAM requirements for the various chat models?
You can always check the VRAM and speed for local models within the Tools Tab by clicking the "Chat Models" button, which will display
a nice chart.  However, in general smaller models like Qwen 3 - 0.6b deliver exceptional speed at over 200 characters per second while
requiring minimal VRAM (1.3GB), mid-range models in the 2-9 billion parameter range offer a sweet spot for most users, with speeds
ranging from 150-400 characters per second and VRAM usage between 2.5-9.5GB. Notable standouts include the GLM4-Z1 - 9b, which achieves
an impressive 395 CPS while using under 10GB VRAM, and the Exaone models, which consistently deliver faster performance than
similarly-sized alternatives. For users with high-end GPUs, the larger 24-32 billion parameter models provide enhanced reasoning
capabilities at the cost of reduced speed (95-140 CPS) and substantial VRAM requirements (15-20GB).

### What are the speed and VRAM requirements for the various vision models?
Vision models demonstrate a clear inverse relationship between speed and model size, with smaller models delivering significantly
faster image processing while larger models provide enhanced accuracy at the cost of reduced throughput. The fastest performers are
models like Ovis2 - 2b at 312 characters per second (CPS) and InternVL2.5 - 1b (289 CPS) with relatively low VRAM usage of 2.3-5.8GB.
Florence-2 models, which can be run on a CPU, showcase interesting trade-offs.  For example, Florence-2-Base achieves an impressive
971 CPS on GPU with only 2.6GB VRAM, CPU-only operations drops performance to 157 CPS. Mid-range models like
Granite Vision - 2b (218 CPS, 4.1GB) and THUDM glm4v - 9b (201 CPS, 9.8GB) offer balanced performance for most use cases. The
largest models such as Qwen VL - 7b (174 CPS, 9.6GB) require more resources.

### What are maximunm context length and maximum sequence length and how to they relate?
Each embedding model has a maximum sequence length, and exceeding this limit can result in truncation. To avoid this, regularly
check the maximum sequence length of the model and adjust your settings accordingly. Reducing chunk size or the number of contexts
can help stay within these limits. Maximum "context length" refers to chat models and is very similar to maximum sequence length.
The key thing to understand is that the chunks you put into the vector database should be within the max sequence length of the
vector or embedding model you choose and the maximum context or chunks you retrieve from the vector database multiplied by their
length should stay within the chat model's context length limit.  And make sure to leave enough context for a response.

### What is the scrape documentaton feature?
Within the Tools tab you can select multiple python libraries and scrape their documentation.  Multiple .html files will be downloaded
and you can subsequently create a vector database out of them.  Larger more complex libraries can take a significant amount of time
to scrape to make sure you have a stable Internet connection.

### Which vector or embedding models are available in this program?
All of the embedding models that this program uses are listed on the Models Tab.  You can click on a hyperlink for each one to find
out more information.  The embedding models sometimes change as different versions of this program are released and newer and better
embedding models are released.  This program vets all embedding models, however, before including them for usage.

### What is the manage databaes tab?
The Manage Databases Tab allows you to see all of the vector databases that you have created thus far and what documents are in them.
Select the database you want to view from the pulldown menu and you can see the files that have been embedded.  Also, you can
doubleclick any of the files to open it in your system's default program.  When a vector database is created the location of the
original file is saved as metadata.  As long as you haven't moved the original file on your computer, this metadata will be used to
locate the file and open it in the default program on your system.

### How can I create a vector database?
Go to the Create Database tab and choose the files that you want to add to the vector database.  If you select any file types that are
not supported, the program will let you know and give you an option to automatically exclude them.  Remember, you can repeat this
process as many times as you with.  Also, you can choose whether to select all of the files in a particular directory or simply
choose individual files.  To add audio transcriptions to the database you must first transcribe audio files individually, which can
only be done within the Tools Tab.  To input descriptions of images into the vector database choose an appropriate vision model from
the Settings Tab.  Any images you select will then automatically be processed by that vision model when you create the database.
Remember, make sure and adjust the database creation settings within the Settings Tab before creating the database.

### Can I use images and audio files in my database?
You can use both images and audio in your vector database. Images: When you add image files (like PNG, JPG, BMP), the selected vision
model creates a text description of each image, which is then embedded like a regular text document. For example, a chart might be
described as “A line graph showing revenue over time with an upward trend.” You can then search with queries like “What does the
revenue trend look like?” and retrieve the image. Make sure you choose a vision model in the Settings Tab first and use the Test
Vision Models tool within the Tools Tab ot preview captions before using a particular model. Audio: You can't add audio files directly,
but you can use the Transcribe File tool (powered by OpenAI’s Whisper model) to convert audio to text. This transcript can then be
added like any other document during database creation. If you try to upload audio directly, the program will prompt you to transcribe
it first. By converting images and audio to text, the system supports rich, multi-modal queries — as long as content is processed
correctly.

### What chat models are available with the local models option?
Within the Query Database Tab if you choose the local models option it will allow you to use a specified number of chat models that
will be downloaded directly from the Huggingface website.  All of these models have been specifically chosen for their strength
in question answering using contexts provided by a vector database.  Please ask about a particular family of chat models for more
information or you can visit the repository for the various chat models on Huggingface for more detailed information.  The available
chat models that this program uses sometimes changes as newer models come out with higher capabilities.  All chat models that are
added or removed will be noted in the release notes on Github for the record.

### What are the Qwen 3 Chat Models?
Qwen3 is the latest release in the Qwen family of large language models.  they come in six sizes ranging from .6 billion parameters
to 32 billion gparameters and can be used under the Apache 2.0 license.  A key innovation with the Qwen3 series is the hybrid
"thinking" versus "non-thinking" modes that are available.  This program has opted to use the thinking mode for all Qwen3 models as
it tends to produce the best results for retrieval augmented generation purposes.  The Qwen3 models are multilinguals and are touted
as supporting up to 119 languages.  They were trained on approximately 36 trillion tokens, which is double the amount used for Qwen 2.5.
Qwen has consistently created some of the best open source and free models available and they are a staple of this program.

### What are the Granite 3.3 Chat Models?
The Granite 3.3 chat models are the latest in the Granite series developed by IBM and are released under the Apache 2.0 license.
They are "thinking" or "reasoning" models and have improved upon prior iterations in this regard.  The Granite models were trained
on synthetically generated datasets for long-context tasks and are good for retrieval augmented generation purposes.  Version 3.3
of the models exceed the performance of Granite 3.1 and 3.2 by a significant margin.

### What are the GLM-Z1 Chat Models?
The Z1 family of chat models are created by THUDM and demonstract strong performance across a wide range of tasks, including retrieval
augmented generation.  The benchmarks show that they are particularly strong in general-purpose question answering across a wide range
of domains - e.g. science, math, and other areas.  They come in a 9 billion parameter and 32 billion parameter variants and are a
staple of this program due to their high quality on question answering tasks.

### What is the Mistral Small Chat Model?
The Mistral Small chat model is the third iteration of Mistral models and has 24 billion parameters.  It is released under the
Apache 2.0 license for liberal usage.  Compared to larger models such as LLaMA 3.3 with 70 billion parameters and Qwen 2.5 with
32 billion parameters, the Mistral Small 3 model achieves comparable quality results across a wide range of benchmarks.  What is
unique about the Mistral Small 3 model is its size of 24 billion parameters, which often sits in the sweet spot for VRAM usage for
users having 24 gigabytes of VRAM.  Sometimes larger models having 32 billion parameters will exceed the available VRAM with longer
contexts but Mistral Small 3 leaves sufficient VRAM avaialble in such circumstances. Benchmark results also show that it excels at
reasoning, coding, math, and instruction following, oftentimes producing more succinct answers than other similarly sized models.

### What are the MiniCPM-4 chat models?
The MiniCPM-4 chat models are ultra-efficient, open-source LLMs built by the OpenBMB team for edge devices, offered in lightweight
0.5 B-parameter and 8 B-parameter versions.  The 8b variant achieves comparable performance to Qwen3-8B using only 22% of the training
data.  The 0.5B parameter variant, despite having fewer parameters, significantly outperforms Qwen3-0.6B, Llama3.2-1B, and Gemma3-1B.
The 8b variant matches Qwen3 8b and outperforms GLM4-9B, and exceeds larger models such as Gemma3-12B and approaches Phi4-14B.  The
major advantage is on 128k context sequences where it achieves 7x faster decoding than Qwen3-8b due to the fact that it only attends
to ~6k tokens out of 128k (5% sparsity).  These chat models focus on the English and Chinese languages.

### What is the Deepseek R1 Chat Model?
The Deepseek R1 chat model was previously removed from this program, but it has been re-added since Deepseek released a newer and
improved version in late May, 2025.  This new version, based on the Qwen3 architecture, has significantly improved both its reasoning
and generic response tasks and is an excellent choice for retrieval augmented generation.  It claims to rival much larger open source
models such as Qwen 3 32b and Phi-4 14b and even claims to outperform closed-source models such as OpenAI's os-mini (medium) and
Google's Gemini 2.5 Flash, which is quite impressive.

### What are the BGE Embedding Models?
The BGE family of embedding models were created by BAAI and have long been a staple within the embedding community and this program
in particular.  They are well-respected as producing high quality embeddings for reasonable compute resources.  Although they are
over a year old now, they are still regarded as producing quality embeddings for a reasonable compute cost for most use cases.  At
the time of their release they were state of the art for open source and free embedding models.

### What are the Granite Embedding Models?
The Granite family of embedding models were created by IBM and are lightweight embedding models based on the RoBERTa architecture as
opposed to the BERT architecture like most other embedding models.  IBM touts these models as being suitable for "enterprise" use
cases and come in 30.3 and 125 million parameter sizes.  Along with the Snowflake Arctic embedding models, they are one of the fastest
embedding models that this program offers when considered in relation to the quality of embeddings that they produce.  In contrast to
the Snowflake Arctic embedding models, however, they do not rely upon the Xformers library to achieve this, which is not supported by
all graphics cards.  The Granite embedding models were released in early 2025 under the liberal Apache-2.0 license.  This program only
usese the English-trained variations of the models.

### What are the Intfloat Embedding Models?
Similar to the BGE embedding models produced by BAAI, the Intfloat embedding models have long been a staple of high quality embedding
models in the community and this program.  They include "small," "base," and "large' variants for your particular use case.  They offer
high quality embeddings for the compute resources required and often go head-to-head in comparison with the "bge" models from BAAI.
Although they are well over a year old now they still offer high quality embeddings for a reasonable compute cost and many other
embedding models have been built upon the e5 family of models.

### What are the Arctic Embedding Models?
Snowflake's Arctic-embed models are retrieval-optimized text embedding models built on E5-small and E5-large embedding models created
by Intfloat. Despite their relatively modest sizes, these models outperformed larger competitors on several benchmarks.  They are
also significantly faster than similarly sized models due to their reliance on the Xformers library.  These models can, however, be
run with or without reliance on the Xformers library depending on whether a user's hardware supports it.  The Snowflake Arctic embedding
models are also unique in that they have a maximum sequence length of 8192 tokens, which is far greater than the typical 512 token limit
of other embedding models.

### What are the Qwen3 Embedding Models?
Released in June, 2025, Alibaba’s Qwen 3 Embedding family delivers state-of-the-art text embeddings while staying friendly to everyday hardware.  They are based on the popular Qwen 3 chat models but have special training to make them suitable for generating embeddings.
As of June, 2025, they hold the top three ranked spots on the Huggingface leaderboard.  They are primarily trained on English and
Chinese data, but a fair amount of their training data is also from numerous other languages so they can be reliably used for multilingual
embedding tasks as well.  They are released under the liberal Apache-2.0 license. The Qwen 3 family of embedding models comes in three
practical sizes—“small” (0.6 B parameters), “base” (4 B), and “large” (8 B). Even the 0.6 B version outperforms older 7 B embedding models, which is a phenomenal accomplishment while the 8 B model often edges out commercial offerings. All variants support long contexts (up to 32 k tokens). 

### What is the Scrape Documentation tool?
Scrape Documentation automatically downloads documentation from online sources to build vector databases without manual copy-pasting.
Located in the Tools tab, simply select a documentation source from the dropdown menu (many common libraries are pre-configured) and
click "Scrape." The program will fetch all relevant pages, showing progress as it works. Scraped content is stored in
src/Scraped_Documentation/<NameOfDoc>/. Once complete, you'll need to add these files to a vector database through the Create Database
tab - the scraper only retrieves and saves the docs but doesn't vectorize them.  If documentation has been previously scraped, the
entry appears in red, and you'll be warned before overwriting existing data. This feature is particularly useful for creating
searchable knowledge bases from official documentation for technical Q&A using the VectorDB-Plugin.

### How do I test vision models on images?
The Test Vision Models tool in the Tools tab lets you preview how vision models describe your images before adding them to a database.
It offers two main options: (1) Multiple Files + One Vision Model, which tests one vision model on multiple images. First, select
image files in the Create Database tab, then choose your vision model in Settings. Return to Tools and click "Multiple Files + One
Vision Model – Process." The tool generates descriptions for all images without creating a database, showing average description
length to help you evaluate the model's performance.  Single Image + All Vision Models: Compare multiple vision models on one image.
Click this option, select an image, then choose which vision models to test from the dialog (they're listed with VRAM requirements).
The tool will sequentially process your image through each model and produce a comparison showing each model's description and
processing time. This helps you balance quality versus speed when selecting a vision model.

### What is Optical Character Recognition?
Optical character recognition (aka OCR) refers to whether a .pdf file has a text layer embedded within it representing the actual text
in the document.  The exact structure of the .pdf file format in general is beyond the scope of this tutorial, but generally a .pdf
will have a "glyph" layer that contains the visual representations of text as we commonly understand them being in different "fonts" or
other representations and styles.  The "text layer" refers to a text representation of these common glyphs that a .pdf may or may not
have, which is unseen but which is ultimately extracted when text is extracted from a .pdf document.  If a .pdf does not have this text
layer then text cannot be extracted from a .pdf unless OCR has been done on it, which you can do with this program.  To do so, go to
the Tool Tab, select a .pdf, and perform OCR.  You can Ask Jeeves for more details regarding this if need be.

### How can I extract text from PDFs or images with OCR?
The OCR tool, found in the Tools tab, converts image-based documents into searchable text using the built-in Tesseract engine. To use it:
(1) Go to the "OPTICAL CHARACTER RECOGNITION" section in the Tools tab.
(2) Ensure "Tesseract" is selected from the dropdown (it’s usually pre-selected).
(3) Click "Choose PDF" to upload your scanned PDF or image file.
(4) Click "Process" to start extracting text.
Once processing is complete, the tool generates two outputs:
(1) A new PDF file with an "_OCR" suffix that includes the original document along with an invisible, searchable text layer.
(2) A plain text file containing all the recognized text, including page markers like [[page1]].
You can then upload either the OCR-enhanced PDF or the plain text file to your vector database using the Create Database tab. The
tool works best with PDFs, including multi-page ones, but it also supports image files. OCR accuracy varies depending on the clarity
and quality of the input, so it's important to review the results carefully when accuracy is critical.

### What other features does the Misc tab have?
In addition to backup and restore, the Misc tab includes three visualization tools: GPU Comparison Chart: Click the "GPUs" button to
open a chart that compares graphics cards based on performance and memory. You can filter results by VRAM range (e.g., 4–6 GB, 8 GB,
10–12 GB), making it easier to evaluate which GPUs are suitable for running various models. Chat Models Comparison: Selecting
"Chat Models" brings up a chart comparing local chat models, displaying estimated VRAM usage and token generation speeds. Models are
typically color-coded by category (e.g., general use vs. coding), giving you a clear picture of which ones align with your GPU
capabilities. Vision Models Comparison: Clicking "Vision Models" launches a comparison of available vision captioning models,
highlighting their size, VRAM requirements, and performance benchmarks such as processing time per image. All visualizations open
in separate windows using matplotlib. These tools are purely informational, aimed at helping users make informed choices about
model compatibility and system requirements. To return to the application, simply close the chart window.

### What is Ask Jeeves and how do I use it?
Ask Jeeves is an integrated help assistant built into the VectorDB-Plugin, designed to serve as an in-app guide or Q&A tool. You can
access it from the menu bar—look for the "Ask Jeeves" option. When launched, it opens a new window where you can type in questions
about using the program. For instance, you might ask, “How do I add a PDF to my database?” or “What does chunk overlap mean?” Ask
Jeeves will respond with helpful answers sourced from the documentation. Ask Jeeves is ideal for getting quick guidance while
actively using the program, without needing to leave the interface or consult external resources. If the feature doesn’t respond
or appears broken, users are encouraged to report the issue on GitHub, as it may indicate a problem with loading the help content.
Think of Ask Jeeves as your on-demand tutor—just click it, type a plain-English question about the VectorDB-Plugin, and get clear
explanations or step-by-step instructions. And yes, the name is a playful reference to the classic “Ask Jeeves” search engine,
suggesting you can ask it anything!

### What are the InternVL3 Vision Models?
InternVL3, released in April 2025, is an advanced open-source multimodal LLM series trained natively on interleaved text, image,
and video data. It follows a ViT-MLP-LLM architecture with vision encoders up to 6B parameters and integrates with LLMs like
InternLM 3 and Qwen2.5. A major innovation is Variable Visual Position Encoding (V2PE), which enhances long-context visual
reasoning by using finer positional increments for visual tokens. The model employs Native Multimodal  re-Training, combining
language and vision learning in one stage, improving performance without separate alignment stages. InternVL3 also introduces
Mixed Preference Optimization and uses dynamic image tiling, JPEG compression, and over 300K instruction-following samples for
training. A Visual Process Reward Model improves inference via best-of-N reasoning chains. Empirically, InternVL3 achieves top
scores across benchmarks like MMMU, MathVista, and OCRBench, outperforming previous models at all scales. It extends capabilities
beyond traditional multimodal reasoning to tool use, 3D perception, GUI interaction, and industrial analysis.

### What are the Ovis2 Vision Models?
Ovis2 launched in January 2025 as a second-generation multimodal large language model optimized for compact sizes (1B and 2B). It
integrates Apple’s AIMv2 vision transformer and supports Qwen2.5 or InternLM 2.5 as its language backend. A key innovation is its
visual embedding table, which structurally aligns image patches with textual tokens using a shared embedding strategy, improving
coherence across modalities. Unlike traditional connector-based MLLMs, Ovis2 maps visual inputs into probabilistic tokens that
interact with a large visual vocabulary (131,072 visual words), allowing for sparse, efficient visual representation. The model is
instruction-tuned on diverse multimodal data, including videos, multilingual OCR, and charts, boosting chain-of-thought reasoning.
Though not trained with quantization, 4-bit GPTQ versions were made available in March 2025. Ovis2 achieves state-of-the-art results
across various benchmarks, including 89.1 on OCRBench and 83.6 on MMBench-V1.1 for the 8B version. Overall, Ovis2’s architectural
advancements enable high performance on vision-language tasks while maintaining efficiency in smaller model sizes.

### What are the Florence-2 Vision Models?
Florence-2, released by Microsoft in June 2024, comes in two sizes—Base (232M parameters) and Large (771M)—and uses a sequence-to-sequence
architecture built on DaViT and Transformer layers. The model is trained on FLD-5B, a dataset with 5.4 billion annotations across
126 million images, created by the automated Florence data engine. Florence-2 integrates visual inputs with textual prompts and excels
in zero-shot tasks, outperforming much larger models like Flamingo-80B on benchmarks such as COCO captioning and TextVQA. It performs
well across multiple levels of granularity, from full images to specific regions and pixels, enabling state-of-the-art performance in
various tasks. Its design allows for multitask learning without the need for separate modules, improving efficiency and simplifying
deployment. Fine-tuning on public datasets further boosts its accuracy and robustness in real-world applications. Unlike traditional
dual-encoder models like CLIP, Florence-2 uses a single Transformer stack with joint vision-text training, accepting both images and
text prompts as input and producing outputs in text or structured formats.

### What are the Granite Vision Models?
Granite Vision is IBM's enterprise-focused vision-language model optimized for visual document understanding released in February 2025.
It has around 3 billion parameters and uses a SigLIP vision encoder, a two-layer GELU-activated MLP connector, and the
granite-3.1-2b-instruct language model. Trained on 13 million images and 80 million instructions using public and synthetic data.
Granite Vision excels at layout parsing, text recognition, and UI analysis, especially for charts and tables, achieving up to 95%
accuracy in chart extraction. It matches or surpasses models like Phi3.5v and InternVL2 on document benchmarks such as DocVQA, ChartQA,
and TextVQA. Unique features include sparse attention-based safety mechanisms and multi-layer feature extraction. The model, based on
the LLaVA architecture, is open-source under the Apache 2.0 license and supports commercial use. Granite Vision consistently outperforms
or matches Phi3.5v and InternVL2 across key benchmarks, highlighting its strong advantage in document-focused vision-language tasks.

### What are the Qwen2.5VL Vision Models?
Qwen2.5-VL is the latest vision-language model in the Qwen family. It excels in visual understanding tasks like object recognition,
text and chart analysis, and document parsing. The model features a streamlined ViT-based vision encoder with window attention,
SwiGLU activations, RMSNorm, and dynamic resolution/frame rate training for video, enhanced by mRoPE in the time dimension. These
architectural updates allow precise visual localization and robust multimodal reasoning. Qwen2.5-VL-7B outperforms peers like
InternVL2.5-8B, MiniCPM-o 2.6, and GPT-4o-mini in multiple benchmarks: Document QA: DocVQA 95.7%, InfoVQA 82.6%, ChartQA 87.3%
Text recognition: TextVQA 84.9%, OCRBench 864, CC_OCR 77.8% General VLU: MMBench 82.6%, MMVet 67.1% Math reasoning: MathVista 68.2%,
MathVision 25.07% It also resists hallucination better than GPT-4o-mini (HallBench: 52.9% vs. 46.1%). The model integrates tightly
with the Qwen2.5 LLM, sharing its tokenizer and text processing, while extending it with specialized vision-language handling and
support for flexible image resolutions.

### What is the GLM-4V-9B Vision Model?
GLM-4V-9B, developed by Zhipu AI and Tsinghua University, is a 9B-parameter bilingual (Chinese/English) multimodal model released
in mid-2024 as part of the GLM (OpenGLM) series. It integrates vision into the pretrained GLM-4 LLM, supporting high-resolution
inputs up to 1120×1120 and enabling general vision-language tasks like image QA, captioning, and reasoning. The model uses standard
attention and likely linear patch embeddings, with training on large multilingual image-text datasets. GLM-4V-9B incorporates Mixed
Preference Optimization (MPO) to enhance chain-of-thought alignment, similar to InternVL. It supports FP16 precision and an 8K context
window, though quantization is not emphasized. Benchmarks show strong performance: it scored 81.1 on English MMBench and 786 on
OCRBench, outperforming many open models and reportedly rivaling or exceeding GPT-4-turbo and Gemini 1.0 Pro on several vision tasks.

### What is the Molmo-D-0924 Vision Model?
Molmo-D-0924 is a 7–8B parameter open-source vision-language model released by the Allen Institute (AI2) in September 2024, as part
of the larger Molmo project. It combines Qwen2-7B as the language backbone with OpenAI’s CLIP-ViT as the vision encoder and is trained
on a proprietary PixMo dataset of 1M high-quality image–text pairs. A key innovation is its support for multi-turn “pointing” in images
via a special OLMo module, allowing the model to interactively highlight regions in response to queries—moving beyond standard text-only
outputs. The model is decoder-only, optimized for interactive use, and runs efficiently on commodity GPUs with FP16 or bfloat16
precision. While users can’t fine-tune quality knobs beyond image size, it offers real-time responsiveness. On benchmarks, Molmo-7B-D
performs between GPT-4V and GPT-4o and achieves state-of-the-art results among similarly sized open models, as confirmed by academic
and human evaluations.

================================================
FILE: Assets/vision_model_table.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Vision Model Table</title>
    <style>
        body {
            background-color: #161b22;
            color: #f9f9f9;
            font-family: Arial, sans-serif;
        }
        table {
            width: 100%;
            border-collapse: collapse;
            margin: 0 auto; /* Center table */
        }
        th, td {
            border: 1px solid #f9f9f9; /* Text color for border */
            text-align: center;
            padding: 8px;
        }
        th {
            background-color: #004D40;
            color: black; /* For contrast against light background */
        }
        .main-header {
            background-color: #004D40;
        }
		a {
		  color: #00bf9e; /* Change this to your desired color */
		}
		a:visited {
		  color: #00bf9e; /* Color for visited links */
	
	00bf9e	


    </style>
</head>
<body>
<style>
  table {
    font-size: 16px;
    line-height: .75;
  }
</style>
<table>
    <tr>
        <th rowspan="2">Model Name</th>
        <th colspan="2" class="main-header">GPU Metrics</th>
        <th colspan="2" class="main-header"># Characters in Summary</th>
    </tr>
    <tr>
        <th>Characters/s</th>
        <th>Memory</th>
        <th>Low</th>
        <th>High</th>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/OpenGVLab/InternVL2_5-1B">InternVL2.5 - 1b</a></td>
        <td>291.18</td>
        <td>2.4 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/microsoft/Florence-2-base">Florence2-Base - 223m</a></td>
        <td>766.49</td>
        <td>2.6 GB</td>
        <td>350</td>
        <td>660</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/OpenGVLab/InternVL2_5-4B">InternVL2.5 - 4b</a></td>
        <td>173.57</td>
        <td>3.1 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/vikhyatk/moondream2">Moondream2 - 2b</a></td>
        <td>344.97</td>
        <td>4.5 GB</td>
        <td>299</td>
        <td>644</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/microsoft/Florence-2-large">Florence2-Large - 772m</a></td>
        <td>564.86</td>
        <td>5.3 GB</td>
        <td>450</td>
        <td>650</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/h2oai/h2ovl-mississippi-2b">Mississippi - 2b</a></td>
        <td>320.00</td>
        <td>5.3 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/AIDC-AI/Ovis1.6-Llama3.2-3B">Ovis1.6-Llama3.2 - 3b</a></td>
        <td>321.79</td>
        <td>9.6 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/THUDM/glm-4v-9b">GLM-4v - 9b</a></td>
        <td>140.65</td>
        <td>10.4 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf">llava 1.6 vicuna - 13b</a></td>
        <td>120.98</td>
        <td>11.2 GB</td>
        <td>501</td>
        <td>1045</td>
    </tr>
    <tr>
        <td><a href="https://huggingface.co/ctranslate2-4you/molmo-7B-O-bnb-4bit">Molmo-D-0924 - 8b</a></td>
        <td>146.60</td>
        <td>12.4 GB</td>
        <td>TBD</td>
        <td>TBD</td>
    </tr>
</table>
</body>
</html>

================================================
FILE: CSS/template.css
================================================
DocQA_GUI {
  background-color: $bg_window;
}
QWidget {
  border: none;
}
QPushButton {
  background-color: $bg_control;
  color: $text_primary;
  font: 10pt "Segoe UI Historic";
  border-radius: 5px;
  padding: 5px;
  min-width: 60px;
  border: 1px solid transparent;
}
QPushButton:hover {
  background-color: $bg_control_hover;
  border: 1px solid $border_focus;
  color: $text_primary;
}
QLabel {
  color: $text_primary;
}
QComboBox {
  background-color: $bg_control;
  color: $text_primary;
  border: 1px solid $bg_window;
  border-radius: 5px;
  padding: 3px;
}
QComboBox:hover,
QComboBox:focus {
  background-color: $bg_control_hover;
  color: $text_primary;
  border: 1px solid $bg_window;
}
QComboBox QAbstractItemView {
  background-color: $bg_surface;
  color: $text_primary;
  border: 1px solid $bg_window;
  border-radius: 5px;
}
QComboBox QAbstractItemView::item:hover {
  background-color: $bg_list_hover;
  color: $text_primary;
}
QLineEdit {
  background-color: $bg_window;
  color: $text_input;
  border: 1px solid transparent;
  border-radius: 5px;
  padding: 3px;
}
QLineEdit:hover,
QLineEdit:focus {
  border: 1px solid $border_focus;
}
QLineEdit::placeholder {
  color: $text_placeholder;
}
QRadioButton {
  color: $text_primary;
}
QGroupBox {
  border: 1px solid $bg_surface;
  border-radius: 5px;
  color: $text_primary;
  font-size: 12pt;
  padding: 10px;
}
DownloadModelDialog {
  background-color: $bg_window;
}
QFrame {
  background-color: $bg_window;
}
QTextEdit[readOnly="true"] {
  background-color: $bg_surface;
  color: $text_primary;
  border: 1px solid $bg_control;
  border-radius: 5px;
  selection-background-color: $selection_bg;
  selection-color: $selection_fg;
  font: 14pt "Segoe UI Historic";
}
QTextEdit[readOnly="false"] {
  background-color: $bg_surface;
  color: $text_primary;
  border: 1px solid $bg_control;
  border-radius: 5px;
  selection-background-color: $selection_bg;
  selection-color: $selection_fg;
  font: 14pt "Segoe UI Historic";
}
QTabWidget {
  background-color: $bg_window;
  border: none;
}
QTabWidget, QTabWidget::pane {
  margin: 0px;
  padding: 0px;
  border: none;
}
QTabBar::tab {
  background-color: $bg_tab;
  color: $text_primary;
  border-bottom-left-radius: 3px;
  border-bottom-right-radius: 3px;
  margin: 3px;
  padding: 5px 5px;
}
QTabBar::tab:selected {
  background-color: $bg_tab_selected;
  border-bottom: 3px solid $border_focus;
}
QTabBar::tab:hover {
  background-co

Download .txt

gitextract_gve6w__6/

├── .gitignore
├── Assets/
│   ├── SentenceTransformer.py
│   ├── core.py
│   ├── user_manual_consolidated.md
│   └── vision_model_table.html
├── CSS/
│   └── template.css
├── README.md
├── Tokenizer/
│   ├── special_tokens_map.json
│   ├── tokenizer.json
│   ├── tokenizer.model
│   └── tokenizer_config.json
├── __main__.py
├── charts/
│   ├── __init__.py
│   ├── all_gpus.py
│   ├── gpu_info.py
│   ├── models_chat.py
│   ├── models_vector.py
│   └── models_vision.py
├── chat/
│   ├── __init__.py
│   ├── base.py
│   ├── jeeves.py
│   ├── kobold.py
│   ├── lm_studio.py
│   ├── local_model.py
│   ├── minimax.py
│   └── openai.py
├── core/
│   ├── __init__.py
│   ├── chatgpt_settings.py
│   ├── config.py
│   ├── constants.py
│   ├── extract_metadata.py
│   ├── initialize.py
│   └── utilities.py
├── db/
│   ├── __init__.py
│   ├── choose_documents.py
│   ├── create_symlinks.py
│   ├── cuda_manager.py
│   ├── database_interactions.py
│   ├── document_processor.py
│   ├── embedding_models.py
│   ├── process_manager.py
│   ├── sqlite_operations.py
│   ├── stage_extract.py
│   ├── stage_split.py
│   └── stage_tokenize.py
├── gui/
│   ├── __init__.py
│   ├── credentials.py
│   ├── dialogs/
│   │   ├── __init__.py
│   │   ├── ai_backends_dialog.py
│   │   ├── chatgpt_tab.py
│   │   ├── kobold_tab.py
│   │   ├── lm_studio_tab.py
│   │   └── minimax_tab.py
│   ├── download_model.py
│   ├── main_window.py
│   ├── metrics_bar.py
│   ├── tabs.py
│   ├── tabs_databases/
│   │   ├── __init__.py
│   │   ├── create.py
│   │   ├── manage.py
│   │   └── query.py
│   ├── tabs_models/
│   │   ├── __init__.py
│   │   └── models.py
│   ├── tabs_settings/
│   │   ├── __init__.py
│   │   ├── database_create.py
│   │   ├── database_query.py
│   │   ├── settings.py
│   │   ├── tts.py
│   │   └── vision.py
│   └── tabs_tools/
│       ├── __init__.py
│       ├── misc.py
│       ├── ocr.py
│       ├── scrape.py
│       ├── tools.py
│       ├── transcribe.py
│       └── vision.py
├── gui.py
├── modules/
│   ├── __init__.py
│   ├── kokoro.py
│   ├── ocr.py
│   ├── process_images.py
│   ├── scraper.py
│   ├── transcribe.py
│   ├── tts.py
│   └── voice_recorder.py
├── setup_windows.py
└── tools/
    ├── __init__.py
    ├── check_packages.py
    ├── chunk_userguide.py
    └── replace_sourcecode.py

Download .txt

SYMBOL INDEX (1108 symbols across 67 files)

FILE: Assets/SentenceTransformer.py
  class SentenceTransformer (line 61) | class SentenceTransformer(nn.Sequential, FitMixin, PeftAdapterMixin):
    method __init__ (line 166) | def __init__(
    method get_backend (line 390) | def get_backend(self) -> Literal["torch", "onnx", "openvino"]:
    method encode (line 400) | def encode(
    method encode (line 419) | def encode(
    method encode (line 438) | def encode(
    method encode (line 456) | def encode(
    method encode (line 474) | def encode(
    method encode (line 492) | def encode(
    method encode (line 510) | def encode(
    method encode (line 526) | def encode(
    method forward (line 790) | def forward(self, input: dict[str, Tensor], **kwargs) -> dict[str, Ten...
    method similarity_fn_name (line 801) | def similarity_fn_name(self) -> Literal["cosine", "dot", "euclidean", ...
    method similarity_fn_name (line 818) | def similarity_fn_name(
    method similarity (line 830) | def similarity(self, embeddings1: Tensor, embeddings2: Tensor) -> Tens...
    method similarity (line 833) | def similarity(self, embeddings1: npt.NDArray[np.float32], embeddings2...
    method similarity (line 836) | def similarity(self) -> Callable[[Tensor | npt.NDArray[np.float32], Te...
    method similarity_pairwise (line 880) | def similarity_pairwise(self, embeddings1: Tensor, embeddings2: Tensor...
    method similarity_pairwise (line 883) | def similarity_pairwise(
    method similarity_pairwise (line 888) | def similarity_pairwise(
    method start_multi_process_pool (line 926) | def start_multi_process_pool(
    method stop_multi_process_pool (line 976) | def stop_multi_process_pool(pool: dict[Literal["input", "output", "pro...
    method encode_multi_process (line 996) | def encode_multi_process(
    method _encode_multi_process_worker (line 1095) | def _encode_multi_process_worker(
    method set_pooling_include_prompt (line 1122) | def set_pooling_include_prompt(self, include_prompt: bool) -> None:
    method get_max_seq_length (line 1140) | def get_max_seq_length(self) -> int | None:
    method tokenize (line 1152) | def tokenize(self, texts: list[str] | list[dict] | list[tuple[str, str...
    method get_sentence_features (line 1166) | def get_sentence_features(self, *features) -> dict[Literal["sentence_e...
    method get_sentence_embedding_dimension (line 1169) | def get_sentence_embedding_dimension(self) -> int | None:
    method truncate_sentence_embeddings (line 1189) | def truncate_sentence_embeddings(self, truncate_dim: int | None) -> It...
    method _first_module (line 1218) | def _first_module(self) -> torch.nn.Module:
    method _last_module (line 1222) | def _last_module(self) -> torch.nn.Module:
    method save (line 1226) | def save(
    method save_pretrained (line 1317) | def save_pretrained(
    method _create_model_card (line 1345) | def _create_model_card(
    method save_to_hub (line 1390) | def save_to_hub(
    method push_to_hub (line 1454) | def push_to_hub(
    method _text_length (line 1589) | def _text_length(self, text: str | list[int] | list[list[int]]) -> int:
    method evaluate (line 1606) | def evaluate(self, evaluator: SentenceEvaluator, output_path: str = No...
    method _load_auto_model (line 1621) | def _load_auto_model(
    method _load_module_class_from_ref (line 1677) | def _load_module_class_from_ref(
    method _load_sbert_model (line 1705) | def _load_sbert_model(
    method load (line 1898) | def load(input_path) -> SentenceTransformer:
    method device (line 1902) | def device(self) -> device:
    method tokenizer (line 1927) | def tokenizer(self) -> Any:
    method tokenizer (line 1934) | def tokenizer(self, value) -> None:
    method max_seq_length (line 1941) | def max_seq_length(self) -> int:
    method max_seq_length (line 1960) | def max_seq_length(self, value) -> None:
    method _target_device (line 1967) | def _target_device(self) -> torch.device:
    method _target_device (line 1974) | def _target_device(self, device: int | str | torch.device | None = Non...
    method _no_split_modules (line 1978) | def _no_split_modules(self) -> list[str]:
    method _keys_to_ignore_on_save (line 1985) | def _keys_to_ignore_on_save(self) -> list[str]:
    method gradient_checkpointing_enable (line 1991) | def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=...

FILE: Assets/core.py
  class Chat (line 34) | class Chat:
    method __init__ (line 35) | def __init__(self, logger=logging.getLogger(__name__)):
    method has_loaded (line 52) | def has_loaded(self, use_decoder=False):
    method download_models (line 68) | def download_models(
    method load (line 166) | def load(
    method unload (line 202) | def unload(self):
    method sample_random_speaker (line 213) | def sample_random_speaker(self) -> str:
    method sample_audio_speaker (line 216) | def sample_audio_speaker(self, wav: Union[np.ndarray, torch.Tensor]) -...
    class RefineTextParams (line 220) | class RefineTextParams:
    class InferCodeParams (line 233) | class InferCodeParams(RefineTextParams):
    method infer (line 245) | def infer(
    method interrupt (line 309) | def interrupt(self):
    method _load (line 313) | def _load(
    method _infer (line 423) | def _infer(
    method _vocos_decode (line 543) | def _vocos_decode(self, spec: torch.Tensor) -> np.ndarray:
    method _decode_to_wavs (line 550) | def _decode_to_wavs(
    method _infer_code (line 579) | def _infer_code(
    method _refine_text (line 702) | def _refine_text(

FILE: charts/all_gpus.py
  function create_gpu_comparison_plot (line 6) | def create_gpu_comparison_plot(min_vram_threshold=6, max_vram_threshold=8):

FILE: charts/gpu_info.py
  class GPUInfo (line 5) | class GPUInfo(TypedDict):

FILE: charts/models_chat.py
  function create_chat_models_comparison_plot (line 9) | def create_chat_models_comparison_plot():

FILE: charts/models_vector.py
  function create_vector_models_comparison_plot (line 3) | def create_vector_models_comparison_plot():

FILE: charts/models_vision.py
  function create_vision_models_comparison_plot (line 6) | def create_vision_models_comparison_plot():

FILE: chat/base.py
  class ChatSignals (line 32) | class ChatSignals(QObject):
  function load_chat_config (line 38) | def load_chat_config():
  function save_metadata (line 42) | def save_metadata(metadata_list):
  function build_augmented_query (line 47) | def build_augmented_query(contexts, query):
  function cleanup_gpu (line 50) | def cleanup_gpu():
  function utf8_file_operations (line 56) | def utf8_file_operations():
  function _configure_device_settings (line 70) | def _configure_device_settings(settings, model_info):
  function get_max_length (line 92) | def get_max_length(model_name):
  function get_max_new_tokens (line 97) | def get_max_new_tokens(model_name):
  function get_generation_settings (line 102) | def get_generation_settings(max_length, max_new_tokens):
  function make_bnb_settings (line 114) | def make_bnb_settings(dtype):
  function get_hf_token (line 133) | def get_hf_token():
  function check_if_model_is_gated (line 142) | def check_if_model_is_gated(repo_id, hf_token):
  class _StopOnToken (line 158) | class _StopOnToken(StoppingCriteria):
    method __init__ (line 159) | def __init__(self, stop_ids):
    method __call__ (line 162) | def __call__(self, input_ids, scores, **kwargs):
  class BaseModel (line 167) | class BaseModel(ABC):
    method __init__ (line 168) | def __init__(self, model_info, settings, generation_settings, attn_imp...
    method get_model_name (line 224) | def get_model_name(self):
    method create_prompt (line 228) | def create_prompt(self, augmented_query):
    method create_inputs (line 231) | def create_inputs(self, prompt):
    method generate_response (line 241) | def generate_response(self, inputs, remove_token_type_ids=False):
    method cleanup (line 259) | def cleanup(self):
    method switch_model (line 267) | def switch_model(self, new_model_class):
    method free_torch_memory (line 272) | def free_torch_memory(model, tokenizer):
  class LiquidAI (line 279) | class LiquidAI(BaseModel):
    method __init__ (line 280) | def __init__(self, generation_settings, model_name):
    method create_prompt (line 302) | def create_prompt(self, augmented_query):
    method create_inputs (line 310) | def create_inputs(self, prompt):
    method generate_response (line 325) | def generate_response(self, inputs, remove_token_type_ids: bool = False):
  class Granite (line 351) | class Granite(BaseModel):
    method __init__ (line 352) | def __init__(self, generation_settings, model_name):
    method create_prompt (line 362) | def create_prompt(self, augmented_query):
  class Exaone (line 368) | class Exaone(BaseModel):
    method __init__ (line 369) | def __init__(self, generation_settings, model_name):
    method create_prompt (line 384) | def create_prompt(self, augmented_query):
  class Qwen (line 390) | class Qwen(BaseModel):
    method __init__ (line 391) | def __init__(self, generation_settings, model_name):
    method create_prompt (line 407) | def create_prompt(self, augmented_query):
  class Mistral_Small_24b (line 416) | class Mistral_Small_24b(BaseModel):
    method __init__ (line 417) | def __init__(self, generation_settings, model_name=None):
    method create_prompt (line 421) | def create_prompt(self, augmented_query):
  class DeepseekR1 (line 429) | class DeepseekR1(BaseModel):
    method __init__ (line 430) | def __init__(self, generation_settings: dict, model_name: str):
    method create_prompt (line 462) | def create_prompt(self, augmented_query: str) -> str:
    method generate_response (line 466) | def generate_response(self, inputs, remove_token_type_ids: bool = False):
  class GLM4Z1 (line 470) | class GLM4Z1(BaseModel):
    method __init__ (line 471) | def __init__(self, generation_settings: dict, model_name: str):
    method create_prompt (line 505) | def create_prompt(self, augmented_query: str) -> str:
    method generate_response (line 512) | def generate_response(self, inputs, remove_token_type_ids: bool = False):
  class SeedCoder (line 523) | class SeedCoder(BaseModel):
    method __init__ (line 524) | def __init__(self, generation_settings, model_name=None):
    method create_prompt (line 528) | def create_prompt(self, augmented_query):
    method generate_response (line 537) | def generate_response(self, inputs):
  class Phi4 (line 542) | class Phi4(BaseModel):
    method __init__ (line 543) | def __init__(self, generation_settings: dict, model_name: str):
    method create_prompt (line 557) | def create_prompt(self, augmented_query: str) -> str:
    method generate_response (line 564) | def generate_response(self, inputs, remove_token_type_ids: bool = False):
  function generate_response (line 615) | def generate_response(model_instance, augmented_query):
  function choose_model (line 621) | def choose_model(model_name):

FILE: chat/jeeves.py
  class GenerationWorker (line 43) | class GenerationWorker(QThread):
    method __init__ (line 48) | def __init__(self, generator, tokenizer, prompt, model_dir):
    method run (line 56) | def run(self):
    method stop (line 97) | def stop(self):
  class ChatWindow (line 100) | class ChatWindow(QMainWindow):
    method __init__ (line 101) | def __init__(self, parent=None):
    method _ensure_model (line 231) | def _ensure_model(self) -> None:
    method eject_model (line 237) | def eject_model(self):
    method toggle_speech (line 251) | def toggle_speech(self):
    method on_model_selected (line 257) | def on_model_selected(self, index):
    method on_model_downloaded (line 294) | def on_model_downloaded(self, model_name, model_type):
    method _load_model (line 304) | def _load_model(self):
    method showEvent (line 324) | def showEvent(self, event):
    method apply_dark_mode_settings (line 328) | def apply_dark_mode_settings(self):
    method build_prompt (line 349) | def build_prompt(self, user_message):
    method send_message (line 357) | def send_message(self):
    method update_response (line 402) | def update_response(self, token):
    method show_error (line 409) | def show_error(self, error_message):
    method on_generation_finished (line 413) | def on_generation_finished(self):
    method find_top_similar (line 422) | def find_top_similar(self, input_text, top_k=5):
    method debounce_update (line 440) | def debounce_update(self, text):
    method _delayed_update (line 444) | def _delayed_update(self):
    method update_suggestions (line 452) | def update_suggestions(self, suggestions):
    method clear_suggestions (line 461) | def clear_suggestions(self):
    method on_suggestion_clicked (line 466) | def on_suggestion_clicked(self):
    method speak_response (line 473) | def speak_response(self):
    method cancel_speech (line 514) | def cancel_speech(self):
    method on_speech_finished (line 518) | def on_speech_finished(self):
    method handle_tts_error (line 529) | def handle_tts_error(self, error_message):
    method closeEvent (line 534) | def closeEvent(self, event):
  class TTSWorker (line 543) | class TTSWorker(QObject):
    method __init__ (line 547) | def __init__(self, tts, text, voice, speed):
    method stop (line 555) | def stop(self):
    method run (line 560) | def run(self):
  function launch_jeeves_process (line 574) | def launch_jeeves_process():

FILE: chat/kobold.py
  class KoboldChat (line 12) | class KoboldChat:
    method __init__ (line 13) | def __init__(self):
    method connect_to_kobold (line 20) | def connect_to_kobold(self, augmented_query):
    method handle_response_and_cleanup (line 53) | def handle_response_and_cleanup(self, full_response, metadata_list):
    method ask_kobold (line 60) | def ask_kobold(self, query, selected_database):
  class KoboldThread (line 94) | class KoboldThread(QThread):
    method __init__ (line 100) | def __init__(self, query, selected_database):
    method run (line 109) | def run(self):
    method stop (line 118) | def stop(self):

FILE: chat/lm_studio.py
  function _strip_thinking (line 18) | def _strip_thinking(buffer, in_thinking):
  class LMStudioChat (line 59) | class LMStudioChat:
    method __init__ (line 60) | def __init__(self):
    method connect_to_local_chatgpt (line 65) | def connect_to_local_chatgpt(self, prompt):
    method handle_response_and_cleanup (line 115) | def handle_response_and_cleanup(self, full_response, metadata_list):
    method ask_local_chatgpt (line 122) | def ask_local_chatgpt(self, query, selected_database):
  class LMStudioChatThread (line 151) | class LMStudioChatThread(QThread):
    method __init__ (line 152) | def __init__(self, query, selected_database):
    method run (line 158) | def run(self):
  function is_lm_studio_available (line 167) | def is_lm_studio_available():

FILE: chat/local_model.py
  class MessageType (line 18) | class MessageType(Enum):
  class PipeMessage (line 29) | class PipeMessage:
  class LocalModelSignals (line 33) | class LocalModelSignals(QObject):
  class LocalModelChat (line 42) | class LocalModelChat:
    method __init__ (line 43) | def __init__(self):
    method start_model_process (line 49) | def start_model_process(self, model_name):
    method terminate_current_process (line 64) | def terminate_current_process(self):
    method start_chat (line 96) | def start_chat(self, user_question, selected_model, selected_database):
    method is_model_loaded (line 106) | def is_model_loaded(self):
    method eject_model (line 109) | def eject_model(self):
    method _start_listening_thread (line 112) | def _start_listening_thread(self):
    method _listen_for_response (line 128) | def _listen_for_response(self, stop_event):
    method cleanup_listener_resources (line 156) | def cleanup_listener_resources(self):
    method _local_model_process (line 162) | def _local_model_process(conn, model_name):
  function is_cuda_available (line 254) | def is_cuda_available():

FILE: chat/minimax.py
  class MiniMaxChat (line 16) | class MiniMaxChat:
    method __init__ (line 17) | def __init__(self, override_model: str = None):
    method connect_to_minimax (line 27) | def connect_to_minimax(self, augmented_query):
    method handle_response_and_cleanup (line 56) | def handle_response_and_cleanup(self, full_response, metadata_list):
    method ask_minimax (line 67) | def ask_minimax(self, query, selected_database):
  class MiniMaxThread (line 97) | class MiniMaxThread(QThread):
    method __init__ (line 103) | def __init__(self, query, selected_database, model_name: str = None):
    method on_response (line 114) | def on_response(self, text):
    method on_error (line 117) | def on_error(self, error):
    method on_finished (line 120) | def on_finished(self):
    method on_citations (line 123) | def on_citations(self, citations):
    method run (line 126) | def run(self):

FILE: chat/openai.py
  class ChatGPTChat (line 18) | class ChatGPTChat:
    method __init__ (line 19) | def __init__(self):
    method connect_to_chatgpt (line 27) | def connect_to_chatgpt(self, augmented_query):
    method handle_response_and_cleanup (line 72) | def handle_response_and_cleanup(self, full_response, metadata_list):
    method ask_chatgpt (line 83) | def ask_chatgpt(self, query, selected_database):
  class ChatGPTThread (line 113) | class ChatGPTThread(QThread):
    method __init__ (line 119) | def __init__(self, query, selected_database):
    method on_response (line 130) | def on_response(self, text):
    method on_error (line 133) | def on_error(self, error):
    method on_finished (line 136) | def on_finished(self):
    method on_citations (line 139) | def on_citations(self, citations):
    method run (line 142) | def run(self):

FILE: core/chatgpt_settings.py
  function get_display_name (line 27) | def get_display_name(model: str) -> str:
  function get_model_from_display_name (line 31) | def get_model_from_display_name(display_name: str) -> str:
  function get_model_pricing (line 38) | def get_model_pricing(model_name: str) -> tuple[float, float, float]:
  function supports_reasoning_effort (line 42) | def supports_reasoning_effort(model_name: str) -> bool:
  function supports_verbosity (line 49) | def supports_verbosity(model_name: str) -> bool:
  function migrate_legacy_model (line 54) | def migrate_legacy_model(model_name: str) -> str:

FILE: core/config.py
  class OpenAIConfig (line 9) | class OpenAIConfig(BaseModel):
  class MiniMaxConfig (line 15) | class MiniMaxConfig(BaseModel):
  class ServerConfig (line 20) | class ServerConfig(BaseModel):
    method validate_connection_str (line 27) | def validate_connection_str(cls, v: str) -> str:
  class DatabaseConfig (line 33) | class DatabaseConfig(BaseModel):
    method coerce_contexts (line 46) | def coerce_contexts(cls, v):
    method validate_overlap (line 53) | def validate_overlap(cls, v: int, info) -> int:
    method validate_pipeline_preset (line 60) | def validate_pipeline_preset(cls, v: str) -> str:
  class ComputeDeviceConfig (line 67) | class ComputeDeviceConfig(BaseModel):
    method validate_device (line 75) | def validate_device(cls, v: str, info) -> str:
  class DatabaseInfo (line 81) | class DatabaseInfo(BaseModel):
  class AppearanceConfig (line 87) | class AppearanceConfig(BaseModel):
  class PlatformInfo (line 91) | class PlatformInfo(BaseModel):
  class AppConfig (line 95) | class AppConfig(BaseSettings):
    method root_dir (line 119) | def root_dir(self) -> Path:
    method docs_dir (line 123) | def docs_dir(self) -> Path:
    method vector_db_dir (line 127) | def vector_db_dir(self) -> Path:
    method vector_db_backup_dir (line 131) | def vector_db_backup_dir(self) -> Path:
    method models_dir (line 135) | def models_dir(self) -> Path:
    method vector_models_dir (line 139) | def vector_models_dir(self) -> Path:
    method load (line 143) | def load(cls, path: Optional[Path] = None) -> "AppConfig":
    method save (line 166) | def save(self, path: Optional[Path] = None) -> None:
    method update_field (line 175) | def update_field(self, field_path: str, value: Any) -> None:
    method update_setting (line 184) | def update_setting(self, field_path: str, value: Any) -> tuple[bool, s...
    method add_database (line 193) | def add_database(self, name: str, model_path: str, chunk_size: int, ch...
    method remove_database (line 201) | def remove_database(self, name: str) -> None:
    method get_user_databases (line 206) | def get_user_databases(self) -> list[str]:
  function get_config (line 214) | def get_config() -> AppConfig:
  function reload_config (line 223) | def reload_config() -> AppConfig:

FILE: core/constants.py
  class CustomButtonStyles (line 2706) | class CustomButtonStyles:
    method _generate_button_style (line 2762) | def _generate_button_style(cls, color_values):

FILE: core/extract_metadata.py
  function compute_content_hash (line 8) | def compute_content_hash(content: str) -> str:
  function compute_file_hash (line 11) | def compute_file_hash(file_path):
  function extract_common_metadata (line 18) | def extract_common_metadata(file_path, content_hash=None):
  function extract_typed_metadata (line 45) | def extract_typed_metadata(file_path, document_type, content_hash=None):
  function add_pymupdf_page_metadata (line 50) | def add_pymupdf_page_metadata(doc: Document, chunk_size: int = 1200, chu...

FILE: core/initialize.py
  function get_compute_device_info (line 13) | def get_compute_device_info():
  function get_platform_info (line 24) | def get_platform_info():
  function get_supported_quantizations (line 27) | def get_supported_quantizations(device_type):
  function update_config_file (line 35) | def update_config_file(**system_info):
  function check_for_necessary_folders (line 61) | def check_for_necessary_folders():
  function restore_vector_db_backup (line 80) | def restore_vector_db_backup():
  function delete_chat_history (line 108) | def delete_chat_history():
  function main (line 113) | def main():

FILE: core/utilities.py
  function set_cuda_paths (line 25) | def set_cuda_paths():
  function check_backend_dependencies (line 57) | def check_backend_dependencies(backend_name: str, interactive: bool = Tr...
  function is_package_available (line 71) | def is_package_available(pkg_name: str) -> tuple[bool, str]:
  function verify_installation (line 84) | def verify_installation(package_name: str, expected_version: str) -> bool:
  function install_packages (line 92) | def install_packages(packages: list[tuple[str, str]], no_deps: bool = Tr...
  function check_and_install_dependencies (line 111) | def check_and_install_dependencies(required_packages: dict[str, str],
  function get_platform_info (line 134) | def get_platform_info():
  function get_python_version (line 144) | def get_python_version():
  function has_nvidia_gpu (line 157) | def has_nvidia_gpu():
  function gpu_summary (line 172) | def gpu_summary():
  function _needs_ocr_worker (line 228) | def _needs_ocr_worker(path: str) -> bool:
  function clean_triton_cache (line 241) | def clean_triton_cache():
  function check_pdfs_for_ocr (line 261) | def check_pdfs_for_ocr(script_dir):
  class DownloadSignals (line 321) | class DownloadSignals(QObject):
  class DownloadRunnable (line 326) | class DownloadRunnable(QRunnable):
    method __init__ (line 327) | def __init__(self, download_func, *args):
    method run (line 333) | def run(self):
  function download_with_threadpool (line 341) | def download_with_threadpool(download_func, *args, callback=None):
  function download_kokoro_tts (line 348) | def download_kokoro_tts():
  function normalize_chat_text (line 376) | def normalize_chat_text(text):
  function supports_flash_attention (line 443) | def supports_flash_attention():
  function check_cuda_re_triton (line 458) | def check_cuda_re_triton():
  function get_model_native_precision (line 486) | def get_model_native_precision(embedding_model_name, vector_models=None):
  function get_appropriate_dtype (line 505) | def get_appropriate_dtype(compute_device, use_half, model_native_precisi...
  function format_citations (line 573) | def format_citations(metadata_list):
  function list_theme_files (line 628) | def list_theme_files():
  function load_stylesheet (line 631) | def load_stylesheet(name):
  function ensure_theme_config (line 639) | def ensure_theme_config():
  function update_theme_in_config (line 661) | def update_theme_in_config(new_theme):
  function make_theme_changer (line 679) | def make_theme_changer(theme_name):
  function backup_database (line 685) | def backup_database(database_name=None):
  function open_file (line 720) | def open_file(file_path):
  function delete_file (line 731) | def delete_file(file_path):
  function check_preconditions_for_db_creation (line 737) | def check_preconditions_for_db_creation(script_dir, database_name, skip_...
  function my_cprint (line 789) | def my_cprint(*args, **kwargs):
  function has_bfloat16_support (line 795) | def has_bfloat16_support():
  function set_logging_level (line 809) | def set_logging_level():
  function prepare_long_path (line 848) | def prepare_long_path(base_path: str, filename: str) -> str:
  function normalize_text (line 858) | def normalize_text(text, preserve_whitespace=False):
  function get_embedding_batch_size (line 917) | def get_embedding_batch_size(model_name: str, compute_device: str) -> int:
  function get_embedding_dtype_and_batch (line 949) | def get_embedding_dtype_and_batch(
  function configure_logging (line 961) | def configure_logging(level: str = "INFO"):

FILE: db/choose_documents.py
  class SymlinkWorker (line 49) | class SymlinkWorker(QThread):
    method __init__ (line 53) | def __init__(self, source, target_dir, parent=None):
    method run (line 58) | def run(self):
  function choose_documents_directory (line 121) | def choose_documents_directory():
  function show_incompatible_files_dialog (line 268) | def show_incompatible_files_dialog(incompatible_files):
  function load_config (line 302) | def load_config():
  function select_embedding_model_directory (line 307) | def select_embedding_model_directory():
  function _get_main_window (line 323) | def _get_main_window():

FILE: db/create_symlinks.py
  function _points_to (line 7) | def _points_to(link_path: Path, source_path) -> bool:
  function _create_single_symlink (line 14) | def _create_single_symlink(args):
  function create_symlinks_parallel (line 36) | def create_symlinks_parallel(source: Union[str, Path, List[str], List[Pa...

FILE: db/cuda_manager.py
  class CUDAManager (line 9) | class CUDAManager:
    method __new__ (line 13) | def __new__(cls):
    method __init__ (line 21) | def __init__(self):
    method cuda_operation (line 29) | def cuda_operation(self):
    method safe_empty_cache (line 38) | def safe_empty_cache(self):
    method force_empty_cache (line 53) | def force_empty_cache(self):
  function get_cuda_manager (line 67) | def get_cuda_manager() -> CUDAManager:

FILE: db/database_interactions.py
  function _json_dumps (line 79) | def _json_dumps(obj) -> str:
  function _json_dumps (line 82) | def _json_dumps(obj) -> str:
  function _get_split_params (line 109) | def _get_split_params():
  function _run_subprocess_stage (line 118) | def _run_subprocess_stage(name, cmd, timeout=3600):
  function _run_extract_with_retry (line 147) | def _run_extract_with_retry(source_dir, output_pkl):
  function _run_split_with_retry (line 169) | def _run_split_with_retry(extracted_pkl, chunks_pkl, chunk_size, chunk_o...
  function _setup_tiledb_dlls (line 205) | def _setup_tiledb_dlls():
  function create_vector_db_in_process (line 239) | def create_vector_db_in_process(database_name):
  function process_chunks_only_query (line 272) | def process_chunks_only_query(database_name, query, result_queue):
  class CreateVectorDB (line 307) | class CreateVectorDB:
    method __init__ (line 308) | def __init__(self, database_name):
    method initialize_vector_model (line 314) | def initialize_vector_model(self, embedding_model_name, config_data):
    method _create_tiledb_array (line 323) | def _create_tiledb_array(self, texts, vectors_array, metadatas):
    method load_audio_documents (line 442) | def load_audio_documents(self, source_dir=None):
    method clear_docs_for_db_folder (line 462) | def clear_docs_for_db_folder(self):
    method run (line 471) | def run(self):
  function get_query_db (line 646) | def get_query_db(database_name: str) -> "QueryVectorDB":
  function clear_query_cache (line 663) | def clear_query_cache(database_name: Optional[str] = None) -> None:
  class QueryVectorDB (line 678) | class QueryVectorDB:
    method __init__ (line 679) | def __init__(self, selected_database: str):
    method load_configuration (line 714) | def load_configuration(self):
    method initialize_vector_model (line 722) | def initialize_vector_model(self):
    method search (line 734) | def search(self, query, k: Optional[int] = None, score_threshold: Opti...
    method cleanup (line 868) | def cleanup(self):
    method close (line 880) | def close(self):

FILE: db/document_processor.py
  function _get_ingest_params (line 25) | def _get_ingest_params():
  class Document (line 38) | class Document:
  function compute_content_hash (line 43) | def compute_content_hash(content: str) -> str:
  function compute_file_hash (line 47) | def compute_file_hash(file_path):
  function extract_document_metadata (line 55) | def extract_document_metadata(file_path, content_hash=None):
  function _load_pdf (line 74) | def _load_pdf(file_path: Path) -> Optional[str]:
  function _load_docx (line 84) | def _load_docx(file_path: Path) -> Optional[str]:
  function _load_txt (line 90) | def _load_txt(file_path: Path) -> Optional[str]:
  function _load_csv (line 102) | def _load_csv(file_path: Path) -> Optional[str]:
  function _load_html (line 117) | def _load_html(file_path: Path) -> Optional[str]:
  function _load_eml (line 130) | def _load_eml(file_path: Path) -> Optional[str]:
  function _load_msg (line 164) | def _load_msg(file_path: Path) -> Optional[str]:
  function _load_xls (line 177) | def _load_xls(file_path: Path) -> Optional[str]:
  function _load_xlsx (line 194) | def _load_xlsx(file_path: Path) -> Optional[str]:
  function _load_rtf (line 212) | def _load_rtf(file_path: Path) -> Optional[str]:
  function _load_md (line 227) | def _load_md(file_path: Path) -> Optional[str]:
  function load_single_document (line 256) | def load_single_document(file_path: Path) -> Optional[Document]:
  function _extraction_worker_batch (line 289) | def _extraction_worker_batch(file_paths):
  function load_documents (line 310) | def load_documents(source_dir: Path) -> list:
  class FixedSizeTextSplitter (line 366) | class FixedSizeTextSplitter:
    method __init__ (line 367) | def __init__(self, chunk_size: int, chunk_overlap: int = 0):
    method split_documents (line 371) | def split_documents(self, docs: List[Document]) -> List[Document]:
  function add_pymupdf_page_metadata (line 409) | def add_pymupdf_page_metadata(doc: Document, chunk_size: int = 1200, chu...
  function split_documents (line 478) | def split_documents(documents=None, text_documents_pdf=None, chunk_size=...

FILE: db/embedding_models.py
  function _get_tokenize_parallel_workers (line 40) | def _get_tokenize_parallel_workers():
  function _get_model_family (line 49) | def _get_model_family(model_path: str) -> str:
  function _get_prompt_for_family (line 58) | def _get_prompt_for_family(family: str, is_query: bool = False) -> str:
  function _normalize_text (line 66) | def _normalize_text(text: str) -> str:
  function _get_encode_batch_size (line 97) | def _get_encode_batch_size(device: str, model_path: str = "") -> int:
  function _run_subprocess_stage (line 120) | def _run_subprocess_stage(name, cmd, cwd, timeout=3600):
  function _run_tokenize_with_retry (line 149) | def _run_tokenize_with_retry(
  class DirectEmbeddingModel (line 287) | class DirectEmbeddingModel:
    method __init__ (line 288) | def __init__(
    method _initialize_model (line 308) | def _initialize_model(self):
    method _safe_encode (line 353) | def _safe_encode(self, texts: list) -> np.ndarray:
    method embed_documents (line 367) | def embed_documents(self, texts: list) -> np.ndarray:
    method embed_query (line 471) | def embed_query(self, text: str) -> list:
    method __del__ (line 483) | def __del__(self):
  function create_embedding_model (line 492) | def create_embedding_model(
  function load_embedding_model (line 534) | def load_embedding_model(

FILE: db/process_manager.py
  class ProcessManager (line 9) | class ProcessManager:
    method __new__ (line 21) | def __new__(cls):
    method __init__ (line 29) | def __init__(self):
    method register (line 36) | def register(self, process: multiprocessing.Process):
    method unregister (line 41) | def unregister(self, process: multiprocessing.Process):
    method cleanup_one (line 47) | def cleanup_one(self, process: multiprocessing.Process, timeout: float...
    method cleanup_all (line 73) | def cleanup_all(self, timeout: float = 5.0):
    method get_active_count (line 86) | def get_active_count(self) -> int:
  function get_process_manager (line 94) | def get_process_manager() -> ProcessManager:

FILE: db/sqlite_operations.py
  function create_metadata_db (line 5) | def create_metadata_db(persist_directory, documents, hash_id_mappings):

FILE: db/stage_extract.py
  function main (line 18) | def main():

FILE: db/stage_split.py
  function save_checkpoint (line 103) | def save_checkpoint(checkpoint_path, data):
  function run_worker (line 117) | def run_worker(python_exe: str, worker_script_path: str,
  function get_physical_core_count (line 151) | def get_physical_core_count() -> int:
  function run_worker_with_retries (line 163) | def run_worker_with_retries(worker_id: int, total_workers: int,
  function main (line 247) | def main():

FILE: db/stage_tokenize.py
  function save_checkpoint (line 174) | def save_checkpoint(checkpoint_path, data):
  function run_worker (line 188) | def run_worker(python_exe: str, worker_script_path: str,
  function get_physical_core_count (line 224) | def get_physical_core_count() -> int:
  function run_worker_with_retries (line 236) | def run_worker_with_retries(worker_id, total_workers, python_exe, worker...
  function main (line 320) | def main():

FILE: gui/credentials.py
  class CredentialManager (line 12) | class CredentialManager(ABC):
    method __init__ (line 13) | def __init__(self, parent_widget):
    method _load_config (line 18) | def _load_config(self) -> dict:
    method _save_config (line 24) | def _save_config(self) -> None:
    method dialog_title (line 30) | def dialog_title(self) -> str:
    method dialog_label (line 35) | def dialog_label(self) -> str:
    method clear_button_text (line 40) | def clear_button_text(self) -> str:
    method credential_name (line 45) | def credential_name(self) -> str:
    method get_current_credential (line 49) | def get_current_credential(self) -> Optional[str]:
    method update_credential (line 53) | def update_credential(self, value: Optional[str]) -> None:
    method show_dialog (line 56) | def show_dialog(self) -> None:
  class HuggingFaceCredentialManager (line 106) | class HuggingFaceCredentialManager(CredentialManager):
    method dialog_title (line 108) | def dialog_title(self) -> str:
    method dialog_label (line 112) | def dialog_label(self) -> str:
    method clear_button_text (line 116) | def clear_button_text(self) -> str:
    method credential_name (line 120) | def credential_name(self) -> str:
    method get_current_credential (line 123) | def get_current_credential(self) -> Optional[str]:
    method update_credential (line 126) | def update_credential(self, value: Optional[str]) -> None:
  class MiniMaxCredentialManager (line 129) | class MiniMaxCredentialManager(CredentialManager):
    method dialog_title (line 131) | def dialog_title(self) -> str:
    method dialog_label (line 135) | def dialog_label(self) -> str:
    method clear_button_text (line 139) | def clear_button_text(self) -> str:
    method credential_name (line 143) | def credential_name(self) -> str:
    method get_current_credential (line 146) | def get_current_credential(self) -> Optional[str]:
    method update_credential (line 149) | def update_credential(self, value: Optional[str]) -> None:
  function manage_credentials (line 154) | def manage_credentials(parent_widget, credential_type: str) -> None:

FILE: gui/dialogs/ai_backends_dialog.py
  class AIBackendsDialog (line 20) | class AIBackendsDialog(QDialog):
    method __init__ (line 28) | def __init__(self, parent=None, initial_tab=0):
    method _load_config (line 61) | def _load_config(self) -> dict:
    method _save_config (line 71) | def _save_config(self, config: dict) -> bool:
    method _on_accept (line 80) | def _on_accept(self) -> None:

FILE: gui/dialogs/chatgpt_tab.py
  class CostPanel (line 28) | class CostPanel(QFrame):
    method __init__ (line 29) | def __init__(self, parent=None):
    method _build_cost_column (line 62) | def _build_cost_column(self, parent_layout: QHBoxLayout, label_text: s...
    method update_for_model (line 74) | def update_for_model(self, model_name: str) -> None:
  class ChatGPTTab (line 93) | class ChatGPTTab(QWidget):
    method __init__ (line 94) | def __init__(self, parent=None):
    method _toggle_api_key_visibility (line 159) | def _toggle_api_key_visibility(self) -> None:
    method _on_model_changed (line 165) | def _on_model_changed(self) -> None:
    method _update_capability_visibility (line 170) | def _update_capability_visibility(self, model: str) -> None:
    method _set_combo_to_model (line 178) | def _set_combo_to_model(self, model: str) -> None:
    method load_from_config (line 185) | def load_from_config(self, config: dict) -> None:
    method save_to_config (line 208) | def save_to_config(self, config: dict) -> None:
    method validate (line 215) | def validate(self) -> tuple[bool, str | None]:

FILE: gui/dialogs/kobold_tab.py
  class KoboldTab (line 5) | class KoboldTab(QWidget):
    method __init__ (line 6) | def __init__(self, parent=None):
    method load_from_config (line 23) | def load_from_config(self, config: dict) -> None:
    method save_to_config (line 26) | def save_to_config(self, config: dict) -> None:
    method validate (line 29) | def validate(self) -> tuple[bool, str | None]:

FILE: gui/dialogs/lm_studio_tab.py
  class LMStudioTab (line 20) | class LMStudioTab(QWidget):
    method __init__ (line 21) | def __init__(self, parent=None):
    method load_from_config (line 51) | def load_from_config(self, config: dict) -> None:
    method save_to_config (line 63) | def save_to_config(self, config: dict) -> None:
    method validate (line 77) | def validate(self) -> tuple[bool, str | None]:
    method _update_port_in_connection_str (line 96) | def _update_port_in_connection_str(connection_str: str, port: str) -> ...

FILE: gui/dialogs/minimax_tab.py
  class MiniMaxTab (line 5) | class MiniMaxTab(QWidget):
    method __init__ (line 6) | def __init__(self, parent=None):
    method load_from_config (line 24) | def load_from_config(self, config: dict) -> None:
    method save_to_config (line 27) | def save_to_config(self, config: dict) -> None:
    method validate (line 30) | def validate(self) -> tuple[bool, str | None]:

FILE: gui/download_model.py
  class ModelDownloadedSignal (line 11) | class ModelDownloadedSignal(QObject):
  function get_hf_token (line 25) | def get_hf_token():
  class ModelDownloader (line 37) | class ModelDownloader(QObject):
    method __init__ (line 38) | def __init__(self, model_info, model_type):
    method cleanup_incomplete_download (line 49) | def cleanup_incomplete_download(self):
    method get_model_directory_name (line 58) | def get_model_directory_name(self):
    method get_model_directory (line 64) | def get_model_directory(self):
    method get_model_url (line 69) | def get_model_url(self):
    method check_repo_type (line 74) | def check_repo_type(self, repo_id):
    method _list_repo_files (line 99) | def _list_repo_files(self, repo_id, use_token):
    method _select_patterns (line 103) | def _select_patterns(self, repo_files, allow_patterns, ignore_patterns):
    method _filter_and_size (line 142) | def _filter_and_size(self, repo_files, allow_patterns, ignore_patterns):
    method download (line 164) | def download(self, allow_patterns=None, ignore_patterns=None):
  function download_embedding_model (line 231) | def download_embedding_model(repo_id, local_dir=None):
  function download_chat_model (line 239) | def download_chat_model(repo_id, local_dir=None):

FILE: gui/main_window.py
  class DocQA_GUI (line 30) | class DocQA_GUI(QWidget):
    method __init__ (line 31) | def __init__(self):
    method set_dark_titlebar (line 41) | def set_dark_titlebar(self):
    method init_ui (line 62) | def init_ui(self):
    method init_menu (line 78) | def init_menu(self):
    method open_chat_backends_dialog (line 102) | def open_chat_backends_dialog(self):
    method open_chat_window (line 106) | def open_chat_window(self):
    method closeEvent (line 161) | def closeEvent(self, event):
  function main (line 189) | def main():

FILE: gui/metrics_bar.py
  class SystemMetrics (line 24) | class SystemMetrics:
  function is_nvidia_gpu_available (line 33) | def is_nvidia_gpu_available():
  function _shutdown_nvml (line 45) | def _shutdown_nvml():
  class MetricsStore (line 53) | class MetricsStore(QObject):
    method __init__ (line 55) | def __init__(self, buffer_size: int = 100):
    method add_metrics (line 58) | def add_metrics(self, metrics: SystemMetrics) -> None:
    method subscribe (line 61) | def subscribe(self, callback):
    method unsubscribe (line 63) | def unsubscribe(self, callback):
    method history (line 69) | def history(self) -> List[SystemMetrics]:
  class BatchCSVLogger (line 72) | class BatchCSVLogger(QObject):
    method __init__ (line 73) | def __init__(self, filepath: str, flush_interval: int = 5000):
    method __enter__ (line 85) | def __enter__(self):
    method __exit__ (line 87) | def __exit__(self, exc_type, exc_val, exc_tb):
    method log (line 89) | def log(self, metrics):
    method flush (line 91) | def flush(self):
    method close (line 98) | def close(self):
    method __del__ (line 102) | def __del__(self):
  function collect_cpu_metrics (line 108) | def collect_cpu_metrics():
  function collect_ram_metrics (line 116) | def collect_ram_metrics():
  function collect_gpu_metrics (line 120) | def collect_gpu_metrics(handle):
  function collect_power_metrics (line 128) | def collect_power_metrics(handle):
  class MetricsCollectorThread (line 148) | class MetricsCollectorThread(QThread):
    method __init__ (line 150) | def __init__(self, interval: int = 200):
    method _collect_once (line 154) | def _collect_once(self):
    method run (line 167) | def run(self):
    method stop (line 173) | def stop(self):
  class BaseVisualization (line 177) | class BaseVisualization(QWidget):
    method __init__ (line 180) | def __init__(self, metrics_store: MetricsStore):
    method _update_widget (line 185) | def _update_widget(self, widget, value):
    method _format_label (line 187) | def _format_label(self, prefix, value):
    method update_metrics (line 189) | def update_metrics(self, m: SystemMetrics):
    method cleanup (line 200) | def cleanup(self):
  function color_for (line 203) | def color_for(name: str) -> str:
  class BarVisualization (line 206) | class BarVisualization(BaseVisualization):
    method __init__ (line 207) | def __init__(self, metrics_store: MetricsStore):
    method _update_widget (line 220) | def _update_widget(self, widget, value):
    method _format_label (line 222) | def _format_label(self, prefix, value):
    method initUI (line 224) | def initUI(self):
    method add_metric_to_grid (line 234) | def add_metric_to_grid(self, label_text, color, grid_layout, row):
    method create_progress_bar (line 242) | def create_progress_bar(self, color):
  function gradient_pixmap (line 251) | def gradient_pixmap(color: str, height: int) -> QPixmap:
  class Sparkline (line 264) | class Sparkline(QWidget):
    method __init__ (line 265) | def __init__(self, max_values=125, color="#0074D9"):
    method add_value (line 270) | def add_value(self, value):
    method paintEvent (line 273) | def paintEvent(self, event):
  class SparklineVisualization (line 309) | class SparklineVisualization(BaseVisualization):
    method __init__ (line 310) | def __init__(self, metrics_store: MetricsStore):
    method _update_widget (line 323) | def _update_widget(self, widget, value):
    method initUI (line 325) | def initUI(self):
  class Speedometer (line 354) | class Speedometer(QWidget):
    method __init__ (line 355) | def __init__(self, min_value=0, max_value=100, colors=None):
    method set_value (line 362) | def set_value(self, value):
    method get_color_at_angle (line 365) | def get_color_at_angle(self, angle):
    method paintEvent (line 381) | def paintEvent(self, event):
  class SpeedometerVisualization (line 408) | class SpeedometerVisualization(BaseVisualization):
    method __init__ (line 409) | def __init__(self, metrics_store: MetricsStore):
    method _update_widget (line 422) | def _update_widget(self, widget, value):
    method initUI (line 424) | def initUI(self):
  function arc_background (line 453) | def arc_background(w: int, h: int) -> QPixmap:
  class ArcGraph (line 465) | class ArcGraph(QWidget):
    method __init__ (line 466) | def __init__(self, color="#0074D9"):
    method set_value (line 471) | def set_value(self, value):
    method paintEvent (line 474) | def paintEvent(self, event):
  class ArcGraphVisualization (line 492) | class ArcGraphVisualization(BaseVisualization):
    method __init__ (line 493) | def __init__(self, metrics_store: MetricsStore):
    method _update_widget (line 506) | def _update_widget(self, widget, value):
    method initUI (line 508) | def initUI(self):
  class VizType (line 535) | class VizType(IntEnum):
  class MetricsWidget (line 548) | class MetricsWidget(QWidget):
    method __init__ (line 549) | def __init__(self, parent=None):
    method init_ui (line 558) | def init_ui(self):
    method contextMenuEvent (line 563) | def contextMenuEvent(self, event):
    method change_visualization (line 586) | def change_visualization(self, kind: VizType):
    method start_metrics_collector (line 597) | def start_metrics_collector(self):
    method stop_metrics_collector (line 601) | def stop_metrics_collector(self):
    method cleanup (line 605) | def cleanup(self):
    method closeEvent (line 617) | def closeEvent(self, event):

FILE: gui/tabs.py
  function create_tabs (line 9) | def create_tabs():

FILE: gui/tabs_databases/create.py
  class VectorDBWorker (line 21) | class VectorDBWorker(QThread):
    method __init__ (line 35) | def __init__(self, database_name, parent=None):
    method run (line 41) | def run(self):
    method cancel (line 88) | def cancel(self):
  class CustomFileSystemModel (line 94) | class CustomFileSystemModel(QFileSystemModel):
    method __init__ (line 95) | def __init__(self, parent=None):
  class DatabasesTab (line 100) | class DatabasesTab(QWidget):
    method __init__ (line 104) | def __init__(self):
    method _validation_failed (line 166) | def _validation_failed(self, message: str):
    method refresh_model_combobox (line 170) | def refresh_model_combobox(self, index):
    method update_model_combobox (line 177) | def update_model_combobox(self, model_name, model_type):
    method populate_model_combobox (line 182) | def populate_model_combobox(self):
    method sync_combobox_with_config (line 195) | def sync_combobox_with_config(self):
    method on_model_selected (line 212) | def on_model_selected(self, index):
    method create_group_box (line 237) | def create_group_box(self, title, directory_name):
    method _refresh_docs_model (line 247) | def _refresh_docs_model(self):
    method _refresh_info_label (line 253) | def _refresh_info_label(self):
    method _compute_precision_str (line 286) | def _compute_precision_str(self, config, use_half):
    method setup_directory_view (line 319) | def setup_directory_view(self, directory_name):
    method on_double_click (line 341) | def on_double_click(self, index):
    method on_context_menu (line 347) | def on_context_menu(self, point):
    method on_delete_file (line 355) | def on_delete_file(self, tree_view):
    method on_create_db_clicked (line 363) | def on_create_db_clicked(self):
    method start_database_creation (line 407) | def start_database_creation(self, database_name, model_name, skip_ocr):
    method on_cancel_db_clicked (line 424) | def on_cancel_db_clicked(self):
    method on_worker_finished (line 431) | def on_worker_finished(self, success: bool, exit_code: int, message: s...
    method update_config_with_database_name (line 459) | def update_config_with_database_name(self):
    method reenable_create_db_button (line 477) | def reenable_create_db_button(self):
    method closeEvent (line 491) | def closeEvent(self, event):
    method toggle_group_box (line 497) | def toggle_group_box(self, group_box, checked):
    method adjust_stretch (line 501) | def adjust_stretch(self):

FILE: gui/tabs_databases/manage.py
  class SQLiteTableModel (line 17) | class SQLiteTableModel(QAbstractTableModel):
    method __init__ (line 18) | def __init__(self, data=None):
    method data (line 23) | def data(self, index, role):
    method rowCount (line 30) | def rowCount(self, index):
    method columnCount (line 33) | def columnCount(self, index):
    method headerData (line 36) | def headerData(self, section, orientation, role):
  class RefreshingComboBox (line 42) | class RefreshingComboBox(QComboBox):
    method __init__ (line 43) | def __init__(self, parent=None):
    method showPopup (line 49) | def showPopup(self):
  class ManageDatabasesTab (line 69) | class ManageDatabasesTab(QWidget):
    method __init__ (line 70) | def __init__(self):
    method load_created_databases (line 93) | def load_created_databases(self):
    method display_no_databases_message (line 101) | def display_no_databases_message(self):
    method create_group_box_with_table_view (line 107) | def create_group_box_with_table_view(self, title):
    method update_table_view_and_info_label (line 126) | def update_table_view_and_info_label(self, index):
    method on_double_click (line 172) | def on_double_click(self, index):
    method create_buttons (line 183) | def create_buttons(self):
    method delete_selected_database (line 188) | def delete_selected_database(self):
    method refresh_pull_down_menu (line 246) | def refresh_pull_down_menu(self):
    method show_context_menu (line 259) | def show_context_menu(self, position):
    method delete_selected_file (line 267) | def delete_selected_file(self):

FILE: gui/tabs_databases/query.py
  class SubmitStrategy (line 34) | class SubmitStrategy(ABC):
    method __init__ (line 35) | def __init__(self, tab):
    method submit (line 39) | def submit(self, question: str, db_name: str) -> None: ...
  class LocalModelStrategy (line 41) | class LocalModelStrategy(SubmitStrategy):
    method submit (line 42) | def submit(self, question, db_name):
  class LMStudioStrategy (line 51) | class LMStudioStrategy(SubmitStrategy):
    method submit (line 52) | def submit(self, question, db_name):
  class ChatGPTStrategy (line 61) | class ChatGPTStrategy(SubmitStrategy):
    method submit (line 62) | def submit(self, question, db_name):
  class MiniMaxStrategy (line 70) | class MiniMaxStrategy(SubmitStrategy):
    method submit (line 71) | def submit(self, question, db_name):
  class KoboldStrategy (line 80) | class KoboldStrategy(SubmitStrategy):
    method submit (line 81) | def submit(self, question, db_name):
  class ChunksOnlyStrategy (line 89) | class ChunksOnlyStrategy(SubmitStrategy):
    method submit (line 90) | def submit(self, question, db_name):
  class ThinkingIndicator (line 96) | class ThinkingIndicator(QProgressBar):
    method __init__ (line 97) | def __init__(self, parent=None):
  class ChunksOnlyThread (line 105) | class ChunksOnlyThread(QThread):
    method __init__ (line 108) | def __init__(self, query, database_name):
    method run (line 115) | def run(self):
    method stop (line 176) | def stop(self):
  function run_tts_in_process (line 193) | def run_tts_in_process(config_path, input_text_file):
  class RefreshingComboBox (line 199) | class RefreshingComboBox(QComboBox):
    method __init__ (line 200) | def __init__(self, parent=None):
    method showPopup (line 203) | def showPopup(self):
  class GuiSignals (line 216) | class GuiSignals(QObject):
  class CustomTextBrowser (line 223) | class CustomTextBrowser(QTextBrowser):
    method __init__ (line 224) | def __init__(self, parent=None):
    method doSetSource (line 228) | def doSetSource(self, name, type):
  class DatabaseQueryTab (line 237) | class DatabaseQueryTab(QWidget):
    method __init__ (line 238) | def __init__(self):
    method initWidgets (line 255) | def initWidgets(self):
    method _strategy_for_source (line 395) | def _strategy_for_source(self, source: str) -> SubmitStrategy:
    method setup_signals (line 409) | def setup_signals(self):
    method _render_html (line 418) | def _render_html(self):
    method toggle_thinking_visibility (line 435) | def toggle_thinking_visibility(self):
    method update_token_count_label (line 438) | def update_token_count_label(self, token_count_string):
    method on_model_source_changed (line 441) | def on_model_source_changed(self, text):
    method load_created_databases (line 452) | def load_created_databases(self):
    method on_submit_button_clicked (line 460) | def on_submit_button_clicked(self):
    method display_chunks (line 490) | def display_chunks(self, chunks):
    method on_database_query_finished (line 493) | def on_database_query_finished(self):
    method eject_model (line 496) | def eject_model(self):
    method on_model_loaded (line 508) | def on_model_loaded(self):
    method on_model_unloaded (line 512) | def on_model_unloaded(self):
    method display_citations_in_widget (line 516) | def display_citations_in_widget(self, citations):
    method on_copy_response_clicked (line 523) | def on_copy_response_clicked(self):
    method on_bark_button_clicked (line 532) | def on_bark_button_clicked(self):
    method run_tts_module (line 578) | def run_tts_module(self):
    method toggle_recording (line 582) | def toggle_recording(self):
    method update_response_lm_studio (line 591) | def update_response_lm_studio(self, response_chunk):
    method update_response_local_model (line 598) | def update_response_local_model(self, chunk: str):
    method show_error_message (line 613) | def show_error_message(self, error_message):
    method on_submission_finished (line 625) | def on_submission_finished(self):
    method update_transcription (line 638) | def update_transcription(self, transcription_text):
    method cleanup (line 641) | def cleanup(self):

FILE: gui/tabs_models/models.py
  class VectorModelsTab (line 13) | class VectorModelsTab(QWidget):
    method __init__ (line 17) | def __init__(self, parent=None):
    method initiate_model_download (line 152) | def initiate_model_download(self):
    method _reset_download_button (line 178) | def _reset_download_button(self):
    method _on_download_failed (line 182) | def _on_download_failed(self, message):
    method update_model_downloaded_status (line 186) | def update_model_downloaded_status(self, model_name, model_type):
    method refresh_gui (line 209) | def refresh_gui(self):
    method open_link (line 214) | def open_link(self, url):

FILE: gui/tabs_settings/database_create.py
  class ChunkSettingsTab (line 8) | class ChunkSettingsTab(QWidget):
    method __init__ (line 9) | def __init__(self):
    method update_config (line 108) | def update_config(self):

FILE: gui/tabs_settings/database_query.py
  class DatabaseSettingsTab (line 18) | class DatabaseSettingsTab(QWidget):
    method __init__ (line 19) | def __init__(self):
    method update_config (line 137) | def update_config(self):
    method reset_search_term (line 261) | def reset_search_term(self):

FILE: gui/tabs_settings/settings.py
  function update_all_configs (line 17) | def update_all_configs(configs):
  function adjust_stretch (line 34) | def adjust_stretch(groups, layout):
  class GuiSettingsTab (line 39) | class GuiSettingsTab(QWidget):
    method __init__ (line 40) | def __init__(self):
    method toggle_group (line 97) | def toggle_group(self, group, checked):
    method toggle_tts_group (line 102) | def toggle_tts_group(self, ttsSettings, checked):
    method toggle_vision_group (line 106) | def toggle_vision_group(self, visionSettings, checked):
    method update_all_settings (line 110) | def update_all_settings(self):

FILE: gui/tabs_settings/tts.py
  class TTSSettingsTab (line 25) | class TTSSettingsTab(QWidget):
    method __init__ (line 109) | def __init__(self):
    method _build_ui (line 116) | def _build_ui(self):
    method _config_path (line 159) | def _config_path(self) -> Path:
    method _load_from_yaml (line 162) | def _load_from_yaml(self):
    method _save_to_yaml (line 211) | def _save_to_yaml(self):
    method _try_read_yaml (line 276) | def _try_read_yaml(self):
    method _update_visible_extras (line 286) | def _update_visible_extras(self):
    method _update_kyutai_voice_visibility (line 307) | def _update_kyutai_voice_visibility(self):
    method _find_key_by_value (line 315) | def _find_key_by_value(d: dict, value: str | None):
    method _disable_voice_cloning_item (line 322) | def _disable_voice_cloning_item(cmb: QComboBox):

FILE: gui/tabs_settings/vision.py
  function _read_cfg (line 11) | def _read_cfg() -> dict:
  function _write_cfg (line 22) | def _write_cfg(cfg: dict) -> None:
  function is_cuda_available (line 27) | def is_cuda_available():
  function get_cuda_capability (line 31) | def get_cuda_capability():
  class VisionSettingsTab (line 37) | class VisionSettingsTab(QWidget):
    method __init__ (line 39) | def __init__(self):
    method populate_model_combobox (line 90) | def populate_model_combobox(self):
    method _apply_model_to_labels (line 94) | def _apply_model_to_labels(self, model_name: str):

FILE: gui/tabs_tools/misc.py
  class WorkerThread (line 8) | class WorkerThread(QThread):
    method __init__ (line 11) | def __init__(self, function, *args, **kwargs):
    method run (line 17) | def run(self):
  class MiscTab (line 25) | class MiscTab(QWidget):
    method __init__ (line 26) | def __init__(self):
    method set_buttons_enabled (line 97) | def set_buttons_enabled(self, enabled, buttons):
    method set_button_text (line 101) | def set_button_text(self, button: QPushButton, text: str):
    method backup_all_databases (line 104) | def backup_all_databases(self):
    method on_backup_finished (line 123) | def on_backup_finished(self, success):
    method restore_backup (line 131) | def restore_backup(self):
    method on_restore_finished (line 150) | def on_restore_finished(self, success):
    method chart_gpus (line 158) | def chart_gpus(self):
    method reset_chart_button (line 180) | def reset_chart_button(self):
    method chart_chat_models (line 184) | def chart_chat_models(self):
    method reset_chart_chat_models_button (line 199) | def reset_chart_chat_models_button(self):
    method chart_vision_models (line 203) | def chart_vision_models(self):
    method reset_chart_vision_models_button (line 218) | def reset_chart_vision_models_button(self):
    method chart_vector_models (line 222) | def chart_vector_models(self):
    method reset_chart_vector_models_button (line 237) | def reset_chart_vector_models_button(self):

FILE: gui/tabs_tools/ocr.py
  function get_pdf_page_count (line 11) | def get_pdf_page_count(pdf_path):
  function run_ocr_process (line 19) | def run_ocr_process(pdf_path, backend):
  class OcrWorkerThread (line 29) | class OcrWorkerThread(QThread):
    method __init__ (line 32) | def __init__(self, pdf_path, backend, parent=None):
    method run (line 37) | def run(self):
  class OCRToolSettingsTab (line 43) | class OCRToolSettingsTab(QWidget):
    method __init__ (line 48) | def __init__(self):
    method create_layout (line 55) | def create_layout(self):
    method setButtons (line 92) | def setButtons(self, enabled):
    method select_pdf_file (line 99) | def select_pdf_file(self):
    method show_error_message (line 115) | def show_error_message(self, message):
    method show_success_message (line 120) | def show_success_message(self):
    method start_ocr_process (line 149) | def start_ocr_process(self):
    method ocr_finished (line 170) | def ocr_finished(self, success, message, elapsed_time):
    method _show_completion_message (line 183) | def _show_completion_message(self, success, message):

FILE: gui/tabs_tools/scrape.py
  function _load_rate_limited_set (line 31) | def _load_rate_limited_set() -> set[str]:
  function _save_rate_limited_set (line 41) | def _save_rate_limited_set(names: set[str]) -> None:
  function _mark_rate_limited_persistent (line 46) | def _mark_rate_limited_persistent(name: str) -> None:
  function _clear_rate_limited_persistent (line 52) | def _clear_rate_limited_persistent(name: str) -> None:
  class ScrapeRowWidget (line 58) | class ScrapeRowWidget(QWidget):
    method __init__ (line 61) | def __init__(self, doc_name: str, folder_path: str, on_cancel, on_open):
    method _set_label (line 85) | def _set_label(self, status_text: str, count: int, color: str):
    method update_count (line 92) | def update_count(self, count: int):
    method mark_completed (line 95) | def mark_completed(self, count: int):
    method mark_cancelled (line 99) | def mark_cancelled(self, count: int):
    method mark_rate_limited (line 103) | def mark_rate_limited(self, count: int):
    method _cancel_clicked (line 110) | def _cancel_clicked(self):
    method _open_clicked (line 115) | def _open_clicked(self):
    method _current_count (line 118) | def _current_count(self) -> int:
  class ScrapeDocumentationTab (line 127) | class ScrapeDocumentationTab(QWidget):
    method __init__ (line 128) | def __init__(self) -> None:
    method init_ui (line 138) | def init_ui(self) -> None:
    method _refresh_summary (line 167) | def _refresh_summary(self) -> None:
    method _restore_rate_limited_rows (line 174) | def _restore_rate_limited_rows(self) -> None:
    method populate_combo_box (line 206) | def populate_combo_box(self) -> None:
    method start_scraping (line 226) | def start_scraping(self) -> None:
    method update_status (line 339) | def update_status(self, doc_name: str, status: str) -> None:
    method _on_worker_finished (line 349) | def _on_worker_finished(self, doc_name: str, was_cancelled: bool, was_...
    method _on_thread_finished (line 377) | def _on_thread_finished(self, doc_name: str) -> None:
    method cancel_scrape (line 382) | def cancel_scrape(self, doc_name: str) -> None:
    method show_error (line 391) | def show_error(self, message: str) -> None:
    method open_folder (line 394) | def open_folder(self, folder_path: str) -> None:

FILE: gui/tabs_tools/tools.py
  class RestoreBackupThread (line 11) | class RestoreBackupThread(QThread):
    method run (line 13) | def run(self):
  class BackupDatabaseThread (line 21) | class BackupDatabaseThread(QThread):
    method run (line 23) | def run(self):
  class GuiSettingsTab (line 31) | class GuiSettingsTab(QWidget):
    method __init__ (line 32) | def __init__(self):
    method adjust_stretch (line 55) | def adjust_stretch(self):

FILE: gui/tabs_tools/transcribe.py
  class TranscriberToolSettingsTab (line 13) | class TranscriberToolSettingsTab(QWidget):
    method __init__ (line 16) | def __init__(self):
    method set_buttons_enabled (line 21) | def set_buttons_enabled(self, enabled):
    method create_layout (line 25) | def create_layout(self):
    method populate_model_combo (line 82) | def populate_model_combo(self):
    method update_slider_label (line 98) | def update_slider_label(self, value):
    method update_config_file (line 101) | def update_config_file(self):
    method select_audio_file (line 105) | def select_audio_file(self):
    method start_transcription (line 115) | def start_transcription(self):

FILE: gui/tabs_tools/vision.py
  function _load_cfg (line 28) | def _load_cfg() -> dict:
  class ModelComparisonProgressDialog (line 39) | class ModelComparisonProgressDialog(QDialog):
    method __init__ (line 45) | def __init__(self, model_names, parent=None):
    method _set_row (line 77) | def _set_row(self, index, icon, suffix=""):
    method on_model_started (line 84) | def on_model_started(self, index, name):
    method on_model_completed (line 87) | def on_model_completed(self, index, name, elapsed):
    method on_model_failed (line 90) | def on_model_failed(self, index, name):
    method mark_finished (line 93) | def mark_finished(self):
    method was_cancelled (line 98) | def was_cancelled(self):
    method _on_action_clicked (line 101) | def _on_action_clicked(self):
    method closeEvent (line 106) | def closeEvent(self, event):
  class ModelSelectionDialog (line 112) | class ModelSelectionDialog(QDialog):
    method __init__ (line 113) | def __init__(self, models, parent=None):
    method get_selected_models (line 137) | def get_selected_models(self):
  class ImageProcessorThread (line 141) | class ImageProcessorThread(QThread):
    method run (line 145) | def run(self):
  class MultiModelProcessorThread (line 169) | class MultiModelProcessorThread(QThread):
    method __init__ (line 177) | def __init__(self, image_path, selected_models):
    method cancel (line 183) | def cancel(self):
    method run (line 186) | def run(self):
  class VisionToolSettingsTab (line 246) | class VisionToolSettingsTab(QWidget):
    method __init__ (line 247) | def __init__(self):
    method confirmationBeforeProcessing (line 267) | def confirmationBeforeProcessing(self):
    method startProcessing (line 285) | def startProcessing(self):
    method onProcessingFinished (line 292) | def onProcessingFinished(self, documents):
    method onProcessingError (line 298) | def onProcessingError(self, error_msg):
    method selectSingleImage (line 303) | def selectSingleImage(self):
    method cancelProcessing (line 344) | def cancelProcessing(self):
    method onMultiModelProcessingFinished (line 348) | def onMultiModelProcessingFinished(self, results):
    method onMultiModelProcessingError (line 358) | def onMultiModelProcessingError(self, error_msg):
    method extract_page_content (line 365) | def extract_page_content(self, documents):
    method save_page_contents (line 390) | def save_page_contents(self, contents):
    method save_comparison_results (line 408) | def save_comparison_results(self, image_path, results):
    method open_file (line 467) | def open_file(self, file_path):

FILE: modules/kokoro.py
  class KokoroTTS (line 13) | class KokoroTTS:
    method __init__ (line 28) | def __init__(self, repo_path: str):
    method _load_model_and_voice (line 49) | def _load_model_and_voice(self, voice_name: str):
    method _drain_queue (line 75) | def _drain_queue(q):
    method stop (line 82) | def stop(self):
    method _process_sentences (line 88) | def _process_sentences(self, speed: float, force_accent: Optional[str]):
    method _play_audio (line 136) | def _play_audio(self):
    method speak (line 177) | def speak(self,

FILE: modules/ocr.py
  class OCRProcessor (line 24) | class OCRProcessor(ABC):
    method __init__ (line 25) | def __init__(self, zoom: int = 2, progress_queue: Queue = None):
    method convert_page_to_image (line 35) | def convert_page_to_image(self, page) -> Image.Image:
    method process_page (line 42) | def process_page(self, page_num: int, pdf_path: str) -> Tuple[int, str]:
    method initialize (line 46) | def initialize(self):
    method clean_text (line 50) | def clean_text(self, text: str) -> str:
    method validate_pdf (line 53) | def validate_pdf(self, pdf_path: Path) -> bool:
    method process_document (line 63) | def process_document(self, pdf_path: Path, output_path: Path = None):
    method get_optimal_threads (line 92) | def get_optimal_threads() -> int:
  class TesseractOCR (line 95) | class TesseractOCR(OCRProcessor):
    method __init__ (line 96) | def __init__(self, zoom: int = 2, progress_queue: Queue = None):
    method initialize (line 102) | def initialize(self):
    method clean_text (line 112) | def clean_text(self, text: str) -> str:
    method cleanup (line 115) | def cleanup(self):
    method process_document (line 120) | def process_document(self, pdf_path: Path, output_path: Path = None):
    method process_page (line 152) | def process_page(self, page_num: int, pdf_path: str) -> Tuple[int, str]:
    method optimize_final_pdf (line 196) | def optimize_final_pdf(self, original_pdf_path: Path, ocr_pdf_path: Pa...
    method cleanup_temp_pdfs (line 218) | def cleanup_temp_pdfs(self):
  function _process_documents_worker (line 227) | def _process_documents_worker(pdf_paths: List[Path], backend: str, model...
  function process_documents (line 243) | def process_documents(pdf_paths: Union[Path, List[Path]], backend: str =...

FILE: modules/process_images.py
  function get_best_device (line 47) | def get_best_device():
  function check_for_images (line 50) | def check_for_images(image_dir: Path) -> bool:
  function run_loader_in_process (line 59) | def run_loader_in_process(loader_func):
  function choose_image_loader (line 68) | def choose_image_loader(model_config: dict | None = None):
  class BaseLoader (line 102) | class BaseLoader:
    method __init__ (line 103) | def __init__(self, config):
    method detect_dtype (line 111) | def detect_dtype():
    method normalize_response (line 116) | def normalize_response(text):
    method initialize_model_and_tokenizer (line 119) | def initialize_model_and_tokenizer(self):
    method process_images (line 122) | def process_images(self):
    method process_single_image (line 151) | def process_single_image(self, raw_image):
  class loader_internvl (line 155) | class loader_internvl(BaseLoader):
    method initialize_model_and_tokenizer (line 156) | def initialize_model_and_tokenizer(self):
    method find_closest_aspect_ratio (line 214) | def find_closest_aspect_ratio(self, aspect_ratio, ratios, w, h, sz):
    method _build_transform (line 227) | def _build_transform(self, size):
    method dynamic_preprocess (line 237) | def dynamic_preprocess(self, img, min_num=1, max_num=12, image_size=44...
    method _prepare_image (line 263) | def _prepare_image(self, raw_image, input_size=448, max_num=24):
    method process_single_image (line 270) | def process_single_image(self, raw_image):
  class loader_granite (line 286) | class loader_granite(BaseLoader):
    method initialize_model_and_tokenizer (line 288) | def initialize_model_and_tokenizer(self):
    method process_single_image (line 411) | def process_single_image(self, raw_image):
  class loader_qwenvl (line 430) | class loader_qwenvl(BaseLoader):
    method initialize_model_and_tokenizer (line 431) | def initialize_model_and_tokenizer(self):
    method process_single_image (line 504) | def process_single_image(self, raw_image):
  class loader_glmv4_thinking (line 532) | class loader_glmv4_thinking(BaseLoader):
    method initialize_model_and_tokenizer (line 539) | def initialize_model_and_tokenizer(self):
    method _cap_pixels_for_glm4v (line 573) | def _cap_pixels_for_glm4v(self, pil_img, max_pixels_2d, divisor=28):
    method process_single_image (line 593) | def process_single_image(self, raw_image):
  class loader_liquidvl (line 643) | class loader_liquidvl(BaseLoader):
    method initialize_model_and_tokenizer (line 644) | def initialize_model_and_tokenizer(self):
    method process_single_image (line 685) | def process_single_image(self, raw_image):

FILE: modules/scraper.py
  function _strip_trailing_version (line 52) | def _strip_trailing_version(path: str) -> str:
  function _strip_embedded_cruft (line 67) | def _strip_embedded_cruft(content):
  class BaseScraper (line 101) | class BaseScraper:
    method __init__ (line 102) | def __init__(self, url, folder):
    method process_html (line 111) | def process_html(self, soup):
    method extract_main_content (line 127) | def extract_main_content(self, soup):
  class SelectorScraper (line 156) | class SelectorScraper(BaseScraper):
    method __init__ (line 157) | def __init__(self, url, folder, selector_key):
    method extract_main_content (line 167) | def extract_main_content(self, soup):
  class PymupdfScraper (line 171) | class PymupdfScraper(BaseScraper):
    method extract_main_content (line 172) | def extract_main_content(self, soup):
  function _mintlify_unwrap (line 207) | def _mintlify_unwrap(md, name):
  function _mintlify_heading (line 219) | def _mintlify_heading(md, name):
  function _mintlify_quote (line 236) | def _mintlify_quote(md, name):
  function _mintlify_normalize_fences (line 249) | def _mintlify_normalize_fences(md_text):
  function render_mintlify_markdown (line 265) | def render_mintlify_markdown(md_text):
  class MintlifyScraper (line 282) | class MintlifyScraper(BaseScraper):
    method collect_seed_urls (line 283) | async def collect_seed_urls(self, session):
    method fetch_url_for (line 307) | def fetch_url_for(self, url):
    method transform_response (line 313) | def transform_response(self, text, url):
    method extract_main_content (line 320) | def extract_main_content(self, soup):
  class DivIdContentSecondScraper (line 324) | class DivIdContentSecondScraper(BaseScraper):
    method extract_main_content (line 325) | def extract_main_content(self, soup):
  class PropCacheScraper (line 332) | class PropCacheScraper(BaseScraper):
    method __init__ (line 333) | def __init__(self, url, folder):
    method extract_main_content (line 344) | def extract_main_content(self, soup):
  class FileDownloader (line 348) | class FileDownloader(BaseScraper):
    method extract_main_content (line 350) | def extract_main_content(self, soup):
    method save_file (line 353) | async def save_file(self, content: bytes, url: str, save_dir: str):
  class ScraperRegistry (line 363) | class ScraperRegistry:
    method get_scraper (line 374) | def get_scraper(cls, scraper_name):
  class ScraperWorker (line 383) | class ScraperWorker(QObject):
    method __init__ (line 389) | def __init__(self, url, folder, scraper_class=BaseScraper, name="", re...
    method run (line 406) | def run(self):
    method _finalize_clean_run (line 422) | def _finalize_clean_run(self):
    method cancel (line 439) | def cancel(self):
    method count_saved_files (line 444) | def count_saved_files(self):
    method crawl_domain (line 447) | async def crawl_domain(
    method _build_resume_queue (line 530) | def _build_resume_queue(self, log_file):
    method fetch (line 560) | async def fetch(
    method save_html (line 644) | async def save_html(self, content, url, save_dir, links=None):
    method sanitize_filename (line 684) | def sanitize_filename(self, url: str) -> str:
    method log_failed_url (line 725) | async def log_failed_url(self, url, log_file):
    method extract_links (line 732) | def extract_links(
    method is_valid_url (line 762) | def is_valid_url(self, url, base_domain, acceptable_domain_extension):
    method cleanup (line 778) | def cleanup(self):

FILE: modules/transcribe.py
  class WhisperTranscriber (line 21) | class WhisperTranscriber:
    method __init__ (line 22) | def __init__(self, model_key, batch_size):
    method start_transcription_process (line 62) | def start_transcription_process(self, audio_file):
    method transcribe_and_create_document (line 71) | def transcribe_and_create_document(self):
    method convert_to_wav (line 105) | def convert_to_wav(self, audio_file):
    method is_correct_format (line 121) | def is_correct_format(self, audio_file):
    method convert_with_av (line 131) | def convert_with_av(self, audio_file, output_path):
    method transcribe (line 159) | def transcribe(self, model, files, lang_codes=['en'], tasks=['transcri...
    method create_document_object (line 168) | def create_document_object(self, transcription_text, audio_file_path):

FILE: modules/tts.py
  class BaseAudio (line 24) | class BaseAudio:
    method __init__ (line 25) | def __init__(self):
    method load_config (line 34) | def load_config(self, config_file, section):
    method initialize_device (line 43) | def initialize_device(self):
    method play_audio_from_queue (line 49) | def play_audio_from_queue(self):
    method run (line 69) | def run(self, input_text_file):
    method stop (line 90) | def stop(self):
  class ChatterboxAudio (line 95) | class ChatterboxAudio(BaseAudio):
    method __init__ (line 101) | def __init__(self):
    method _select_device (line 136) | def _select_device(self, pref):
    method _apply_voice_modifications (line 155) | def _apply_voice_modifications(wav, sr, pitch_factor=1.0, speed_factor...
    method process_text_to_audio (line 182) | def process_text_to_audio(self, sentences):
  class BarkAudio (line 222) | class BarkAudio(BaseAudio):
    method __init__ (line 223) | def __init__(self):
    method initialize_model_and_processor (line 229) | def initialize_model_and_processor(self):
    method process_text_to_audio (line 246) | def process_text_to_audio(self, sentences):
  class WhisperSpeechAudio (line 271) | class WhisperSpeechAudio(BaseAudio):
    method __init__ (line 272) | def __init__(self):
    method get_whisper_speech_models (line 285) | def get_whisper_speech_models(self):
    method initialize_model (line 294) | def initialize_model(self):
    method process_text_to_audio (line 320) | def process_text_to_audio(self, sentences):
    method run (line 336) | def run(self, input_text_file):
  class ChatTTSAudio (line 341) | class ChatTTSAudio(BaseAudio):
    method __init__ (line 342) | def __init__(self):
    method process_text_to_audio (line 382) | def process_text_to_audio(self, sentences):
  class GoogleTTSAudio (line 420) | class GoogleTTSAudio:
    method __init__ (line 422) | def __init__(self, lang='en', slow=False, tld='com', silence_threshold...
    method run (line 429) | def run(self, input_text_file):
    method preprocess_text (line 468) | def preprocess_text(text):
    method tokenize_and_minimize (line 475) | def tokenize_and_minimize(text):
    method trim_silence (line 500) | def trim_silence(self, audio, samplerate):
  class KyutaiAudio (line 529) | class KyutaiAudio(BaseAudio):
    method __init__ (line 535) | def __init__(self):
    method create_checkpoint_info_from_cache (line 550) | def create_checkpoint_info_from_cache(self, downloaded_paths, raw_conf...
    method initialize_model (line 584) | def initialize_model(self):
    method setup_voice_conditioning (line 656) | def setup_voice_conditioning(self):
    method generate_speech_for_sentence (line 683) | def generate_speech_for_sentence(self, sentence):
    method process_text_to_audio (line 709) | def process_text_to_audio(self, sentences):
  class KyutaiPocketAudio (line 729) | class KyutaiPocketAudio(BaseAudio):
    method __init__ (line 734) | def __init__(self):
    method initialize_model (line 749) | def initialize_model(self):
    method generate_speech_for_sentence (line 780) | def generate_speech_for_sentence(self, sentence):
    method process_text_to_audio (line 794) | def process_text_to_audio(self, sentences):
  function run_tts (line 812) | def run_tts(config_path, input_text_file):

FILE: modules/voice_recorder.py
  function get_logical_core_count (line 13) | def get_logical_core_count():
  class TranscriptionThread (line 21) | class TranscriptionThread(QThread):
    method __init__ (line 24) | def __init__(self, audio_file, voice_recorder):
    method run (line 29) | def run(self):
  class RecordingThread (line 60) | class RecordingThread(QThread):
    method __init__ (line 61) | def __init__(self, voice_recorder):
    method run (line 65) | def run(self):
  class VoiceRecorder (line 68) | class VoiceRecorder:
    method __init__ (line 69) | def __init__(self, gui_instance, channels=1, rate=16000, chunk=1024):
    method record_audio (line 76) | def record_audio(self):
    method save_audio (line 92) | def save_audio(self):
    method start_recording (line 113) | def start_recording(self):
    method stop_recording (line 119) | def stop_recording(self):

FILE: setup_windows.py
  function has_nvidia_gpu (line 33) | def has_nvidia_gpu():
  function tkinter_message_box (line 48) | def tkinter_message_box(title, message, type="info", yes_no=False):
  function check_python_version_and_confirm (line 62) | def check_python_version_and_confirm():
  function is_nvidia_gpu_installed (line 78) | def is_nvidia_gpu_installed():
  function manual_installation_confirmation (line 85) | def manual_installation_confirmation():
  function upgrade_pip_setuptools_wheel (line 111) | def upgrade_pip_setuptools_wheel(max_retries=5, delay=3):
  function pip_install (line 141) | def pip_install(library, with_deps=False, max_retries=5, delay=3):
  function install_libraries (line 161) | def install_libraries(libraries):
  function install_libraries_with_deps (line 175) | def install_libraries_with_deps(libraries):
  function create_directory_structure (line 249) | def create_directory_structure():
  function update_config_yaml (line 265) | def update_config_yaml():

FILE: tools/check_packages.py
  function _get_latest_version (line 19) | def _get_latest_version(package_name):
  class OutdatedPackagesWorker (line 36) | class OutdatedPackagesWorker(QObject):
    method run (line 39) | def run(self):
  class VersionsWorker (line 53) | class VersionsWorker(QObject):
    method __init__ (line 56) | def __init__(self, package_name):
    method run (line 59) | def run(self):
    method get_all_versions (line 65) | def get_all_versions(self, package_name):
  class PipWorker (line 88) | class PipWorker(QObject):
    method __init__ (line 91) | def __init__(self, package_name, selected_version):
    method run (line 95) | def run(self):
  class ImportWorker (line 107) | class ImportWorker(QObject):
    method __init__ (line 110) | def __init__(self, packages):
    method run (line 113) | def run(self):
  class LatestVersionWorker (line 124) | class LatestVersionWorker(QObject):
    method __init__ (line 127) | def __init__(self, package_name):
    method run (line 130) | def run(self):
  class CompareDependenciesDialog (line 137) | class CompareDependenciesDialog(QDialog):
    method __init__ (line 138) | def __init__(self, parent, package_name, current_version, latest_versi...
    method update_display (line 223) | def update_display(self):
    method filter_extras (line 267) | def filter_extras(self, deps):
  class PackageChecker (line 273) | class PackageChecker(QMainWindow):
    method __init__ (line 274) | def __init__(self):
    method verify_installation (line 325) | def verify_installation(self, package_name, expected_version):
    method _is_package_available (line 332) | def _is_package_available(self, pkg_name: str):
    method check_package (line 354) | def check_package(self):
    method check_all_packages (line 371) | def check_all_packages(self):
    method check_outdated_packages (line 405) | def check_outdated_packages(self):
    method on_outdated_packages_checked (line 425) | def on_outdated_packages_checked(self, outdated_packages):
    method compare_dependencies (line 459) | def compare_dependencies(self, package_name):
    method on_worker_error (line 488) | def on_worker_error(self, error_message):
    method open_context_menu (line 493) | def open_context_menu(self, position: QPoint):
    method show_reverse_dependencies (line 512) | def show_reverse_dependencies(self, package_name):
    method fetch_versions (line 527) | def fetch_versions(self, package_name, position):
    method on_versions_fetched (line 544) | def on_versions_fetched(self, versions):
    method show_versions_menu (line 547) | def show_versions_menu(self, package_name, versions, position):
    method upgrade_downgrade_package (line 560) | def upgrade_downgrade_package(self, package_name, selected_version):
    method on_pip_finished (line 591) | def on_pip_finished(self, output):
    method on_pip_error (line 617) | def on_pip_error(self, error_message):
    method on_latest_version_fetched (line 622) | def on_latest_version_fetched(self, latest_version):
    method on_latest_version_error (line 625) | def on_latest_version_error(self, error_message):
    method update_outdated_after_upgrade (line 628) | def update_outdated_after_upgrade(self, package_name, latest_version):
    method find_row (line 647) | def find_row(self, package_name):
    method remove_package_from_table (line 654) | def remove_package_from_table(self, package_name):
    method on_versions_error (line 659) | def on_versions_error(self, error_message):
    method show_package_info (line 662) | def show_package_info(self, package_name):
    method export_requirements (line 711) | def export_requirements(self):
    method import_requirements (line 729) | def import_requirements(self):
    method on_import_finished (line 753) | def on_import_finished(self, output):
    method on_import_error (line 759) | def on_import_error(self, error_message):
    method show_message (line 764) | def show_message(self, title, message):
    method set_tooltip_for_package (line 772) | def set_tooltip_for_package(self, row, package_name):
  function main (line 781) | def main():

FILE: tools/chunk_userguide.py
  class MarkdownChunker (line 10) | class MarkdownChunker(QMainWindow):
    method __init__ (line 11) | def __init__(self):
    method setup_ui (line 24) | def setup_ui(self):
    method select_file (line 88) | def select_file(self):
    method create_output_directory (line 101) | def create_output_directory(self):
    method extract_chunks (line 122) | def extract_chunks(self, content):
    method save_chunks (line 149) | def save_chunks(self, chunks, output_dir):
    method analyze_chunks (line 159) | def analyze_chunks(self, chunks):
    method create_master_questions (line 179) | def create_master_questions(self, chunks):
    method update_display (line 197) | def update_display(self, num_chunks):
    method process_file (line 206) | def process_file(self):
  function main (line 239) | def main():

FILE: tools/replace_sourcecode.py
  class DependencyUpdater (line 9) | class DependencyUpdater:
    method __init__ (line 10) | def __init__(self):
    method get_site_packages_path (line 13) | def get_site_packages_path(self):
    method find_dependency_path (line 18) | def find_dependency_path(self, dependency_path_segments):
    method hash_file (line 30) | def hash_file(filepath):
    method copy_and_overwrite_if_necessary (line 38) | def copy_and_overwrite_if_necessary(source_path, target_path):
    method update_file_in_dependency (line 45) | def update_file_in_dependency(self, source_folder, file_name, dependen...
    method print_status (line 69) | def print_status(status, message):
    method print_ascii_table (line 80) | def print_ascii_table(title, rows):
  function replace_sentence_transformer_file (line 90) | def replace_sentence_transformer_file():
  function replace_chattts_file (line 94) | def replace_chattts_file():
  function add_cuda_files (line 98) | def add_cuda_files():
  function setup_vector_db (line 142) | def setup_vector_db():
  function check_embedding_model_dimensions (line 193) | def check_embedding_model_dimensions():

Download .json

Condensed preview — 90 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,362K chars).

[
  {
    "path": ".gitignore",
    "chars": 486,
    "preview": "# Virtual environment\nLib/\nScripts/\nInclude/\npyvenv.cfg\n\n# Python\n__pycache__/\n*.pyc\n*.pyo\n\n# Models (large binary files"
  },
  {
    "path": "Assets/SentenceTransformer.py",
    "chars": 95255,
    "preview": "# modified 4.1.0 to modify \"_text_length\" method and add debugging\r\nfrom __future__ import annotations\r\n\r\nimport copy\r\ni"
  },
  {
    "path": "Assets/core.py",
    "chars": 27690,
    "preview": "# custom code compatible with chattts 0.2.4\r\n# adds the \"local_dir\" parameter\r\n\r\nimport os\r\nimport re\r\nimport logging\r\ni"
  },
  {
    "path": "Assets/user_manual_consolidated.md",
    "chars": 69791,
    "preview": "### What is the VectorDB-Plugin and what can it do?\r\nVectorDB-Plugin is a program that lets you build a vector database "
  },
  {
    "path": "Assets/vision_model_table.html",
    "chars": 3552,
    "preview": "<!DOCTYPE html>\r\n<html lang=\"en\">\r\n<head>\r\n    <meta charset=\"UTF-8\">\r\n    <title>Vision Model Table</title>\r\n    <style"
  },
  {
    "path": "CSS/template.css",
    "chars": 4903,
    "preview": "DocQA_GUI {\n  background-color: $bg_window;\n}\nQWidget {\n  border: none;\n}\nQPushButton {\n  background-color: $bg_control;"
  },
  {
    "path": "README.md",
    "chars": 5292,
    "preview": "<div align=\"center\">\n\n<img width=\"1536\" height=\"248\" alt=\"splash\" src=\"https://github.com/user-attachments/assets/8ecfa8"
  },
  {
    "path": "Tokenizer/special_tokens_map.json",
    "chars": 72,
    "preview": "{\n  \"bos_token\": \"<s>\",\n  \"eos_token\": \"</s>\",\n  \"unk_token\": \"<unk>\"\n}\n"
  },
  {
    "path": "Tokenizer/tokenizer.json",
    "chars": 1671106,
    "preview": "{\n  \"version\": \"1.0\",\n  \"truncation\": null,\n  \"padding\": null,\n  \"added_tokens\": [\n    {\n      \"id\": 0,\n      \"content\":"
  },
  {
    "path": "Tokenizer/tokenizer_config.json",
    "chars": 1466,
    "preview": "{\n  \"add_bos_token\": true,\n  \"add_eos_token\": false,\n  \"added_tokens_decoder\": {\n    \"0\": {\n      \"content\": \"<unk>\",\n  "
  },
  {
    "path": "__main__.py",
    "chars": 237,
    "preview": "import faulthandler\nfaulthandler.enable(all_threads=True)\n\nimport multiprocessing\nmultiprocessing.set_start_method('spaw"
  },
  {
    "path": "charts/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "charts/all_gpus.py",
    "chars": 2959,
    "preview": "import matplotlib.pyplot as plt\r\nimport numpy as np\r\nfrom matplotlib.colors import LinearSegmentedColormap\r\nfrom charts."
  },
  {
    "path": "charts/gpu_info.py",
    "chars": 200823,
    "preview": "# Auto-generated GPU info module\nfrom typing import TypedDict, Dict\nfrom datetime import date\n\nclass GPUInfo(TypedDict):"
  },
  {
    "path": "charts/models_chat.py",
    "chars": 4118,
    "preview": "import matplotlib.pyplot as plt\r\nfrom matplotlib.patches import Patch\r\nfrom matplotlib.colors import LinearSegmentedColo"
  },
  {
    "path": "charts/models_vector.py",
    "chars": 2044,
    "preview": "import matplotlib.pyplot as plt\r\n\r\ndef create_vector_models_comparison_plot():\r\n    models_data = [\r\n        (\"bge-small"
  },
  {
    "path": "charts/models_vision.py",
    "chars": 3987,
    "preview": "import matplotlib.pyplot as plt\r\nimport pandas as pd\r\nfrom matplotlib.colors import LinearSegmentedColormap\r\nfrom matplo"
  },
  {
    "path": "chat/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "chat/base.py",
    "chars": 21286,
    "preview": "import yaml\r\nimport logging\r\nimport gc\r\nfrom copy import deepcopy\r\nimport functools\r\nimport copy\r\nfrom pathlib import Pa"
  },
  {
    "path": "chat/jeeves.py",
    "chars": 21029,
    "preview": "import sys\r\nimport os\r\nos.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'\r\nfrom pathlib import Path\r\n\r\nfrom core.utilities impo"
  },
  {
    "path": "chat/kobold.py",
    "chars": 4528,
    "preview": "import json\r\nimport logging\r\nimport requests\r\nimport sseclient\r\nfrom PySide6.QtCore import QThread, Signal\r\n\r\nfrom db.da"
  },
  {
    "path": "chat/lm_studio.py",
    "chars": 8275,
    "preview": "import logging\r\nimport re\r\n\r\nimport requests\r\nfrom openai import OpenAI\r\nfrom PySide6.QtCore import QThread\r\n\r\nfrom db.d"
  },
  {
    "path": "chat/local_model.py",
    "chars": 11664,
    "preview": "import time\r\nimport logging\r\nfrom enum import Enum, auto\r\nfrom typing import Any, Optional\r\nfrom dataclasses import data"
  },
  {
    "path": "chat/minimax.py",
    "chars": 4688,
    "preview": "import logging\nfrom openai import OpenAI\nfrom PySide6.QtCore import QThread, Signal\n\nfrom db.database_interactions impor"
  },
  {
    "path": "chat/openai.py",
    "chars": 5202,
    "preview": "import logging\nfrom openai import OpenAI\nfrom PySide6.QtCore import QThread, Signal\n\nfrom db.database_interactions impor"
  },
  {
    "path": "core/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "core/chatgpt_settings.py",
    "chars": 1464,
    "preview": "AVAILABLE_OPENAI_MODELS = [\n    \"gpt-5.5\",\n    \"gpt-5.4\",\n    \"gpt-5.4-mini\",\n]\n\nMODEL_DISPLAY_NAMES = {\n    \"gpt-5.5\": "
  },
  {
    "path": "core/config.py",
    "chars": 7406,
    "preview": "from pathlib import Path\nfrom typing import Optional, Dict, Any, Literal\nfrom pydantic import BaseModel, Field, field_va"
  },
  {
    "path": "core/constants.py",
    "chars": 117838,
    "preview": "\r\nimport os\r\nfrom pathlib import Path\r\n\r\nPROJECT_ROOT = Path(__file__).resolve().parent.parent\r\n\r\n_cpu = os.cpu_count() "
  },
  {
    "path": "core/extract_metadata.py",
    "chars": 3525,
    "preview": "import os\r\nimport datetime\r\nimport hashlib\r\nimport re\r\nfrom db.document_processor import Document\r\nfrom typing import Li"
  },
  {
    "path": "core/initialize.py",
    "chars": 3883,
    "preview": "import platform\r\nimport shutil\r\nfrom pathlib import Path\r\nimport logging\r\n\r\nimport torch\r\nimport yaml\r\nimport ctranslate"
  },
  {
    "path": "core/utilities.py",
    "chars": 34081,
    "preview": "import importlib\r\nimport importlib.metadata\r\nimport importlib.util\r\nimport os\r\nimport threading\r\nimport logging\r\nimport "
  },
  {
    "path": "db/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "db/choose_documents.py",
    "chars": 11460,
    "preview": "from pathlib import Path\r\nfrom multiprocessing import Pool, cpu_count\r\n\r\nimport yaml\r\nfrom PySide6.QtCore import QElapse"
  },
  {
    "path": "db/create_symlinks.py",
    "chars": 3170,
    "preview": "import hashlib\r\nfrom multiprocessing import Pool, cpu_count\r\nfrom pathlib import Path\r\nfrom typing import Union, List, T"
  },
  {
    "path": "db/cuda_manager.py",
    "chars": 1931,
    "preview": "import threading\nimport logging\nimport torch\nfrom contextlib import contextmanager\n\nlogger = logging.getLogger(__name__)"
  },
  {
    "path": "db/database_interactions.py",
    "chars": 32268,
    "preview": "import faulthandler\nfaulthandler.enable()\n\n# Module-level TileDB DLL preload. Mirrors the approach in VectorDB-Light's\n#"
  },
  {
    "path": "db/document_processor.py",
    "chars": 17267,
    "preview": "import os\nimport csv\nimport logging\nimport warnings\nimport datetime\nimport hashlib\nimport re\nfrom pathlib import Path\nfr"
  },
  {
    "path": "db/embedding_models.py",
    "chars": 18503,
    "preview": "import gc\nimport logging\nimport os\nimport pickle\nimport subprocess\nimport sys\nimport tempfile\nimport time\nimport unicode"
  },
  {
    "path": "db/process_manager.py",
    "chars": 3155,
    "preview": "import logging\nimport threading\nimport multiprocessing\nfrom typing import List\n\nlogger = logging.getLogger(__name__)\n\n\nc"
  },
  {
    "path": "db/sqlite_operations.py",
    "chars": 1411,
    "preview": "import sqlite3\nfrom pathlib import Path\n\n\ndef create_metadata_db(persist_directory, documents, hash_id_mappings):\n    if"
  },
  {
    "path": "db/stage_extract.py",
    "chars": 1440,
    "preview": "import logging\nimport os\nimport pickle\nimport sys\nimport time\nfrom pathlib import Path\n\n# Ensure project root is on sys."
  },
  {
    "path": "db/stage_split.py",
    "chars": 14708,
    "preview": "import argparse\nimport concurrent.futures\nimport gc\nimport logging\nimport os\nimport pickle\nimport subprocess\nimport sys\n"
  },
  {
    "path": "db/stage_tokenize.py",
    "chars": 20339,
    "preview": "import argparse\nimport concurrent.futures\nimport gc\nimport logging\nimport os\nimport pickle\nimport subprocess\nimport sys\n"
  },
  {
    "path": "gui/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/credentials.py",
    "chars": 5653,
    "preview": "from pathlib import Path\r\nfrom PySide6.QtWidgets import (QDialog, QDialogButtonBox, QVBoxLayout,\r\n                      "
  },
  {
    "path": "gui/dialogs/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/dialogs/ai_backends_dialog.py",
    "chars": 2882,
    "preview": "from pathlib import Path\n\nimport yaml\nfrom PySide6.QtWidgets import (\n    QDialog,\n    QVBoxLayout,\n    QHBoxLayout,\n   "
  },
  {
    "path": "gui/dialogs/chatgpt_tab.py",
    "chars": 7974,
    "preview": "from PySide6.QtWidgets import (\n    QWidget,\n    QVBoxLayout,\n    QHBoxLayout,\n    QLabel,\n    QComboBox,\n    QPushButto"
  },
  {
    "path": "gui/dialogs/kobold_tab.py",
    "chars": 868,
    "preview": "from PySide6.QtCore import Qt\nfrom PySide6.QtWidgets import QWidget, QVBoxLayout, QLabel\n\n\nclass KoboldTab(QWidget):\n   "
  },
  {
    "path": "gui/dialogs/lm_studio_tab.py",
    "chars": 3584,
    "preview": "import re\n\nfrom PySide6.QtGui import QIntValidator\nfrom PySide6.QtWidgets import (\n    QWidget,\n    QVBoxLayout,\n    QHB"
  },
  {
    "path": "gui/dialogs/minimax_tab.py",
    "chars": 913,
    "preview": "from PySide6.QtCore import Qt\nfrom PySide6.QtWidgets import QWidget, QVBoxLayout, QLabel\n\n\nclass MiniMaxTab(QWidget):\n  "
  },
  {
    "path": "gui/download_model.py",
    "chars": 9895,
    "preview": "from pathlib import Path\r\nfrom huggingface_hub import snapshot_download, HfApi\r\nfrom huggingface_hub.utils import disabl"
  },
  {
    "path": "gui/main_window.py",
    "chars": 7174,
    "preview": "import sys\n\nfrom ctypes import windll, byref, sizeof, c_int\nfrom ctypes.wintypes import BOOL, HWND, DWORD\n\nfrom PySide6."
  },
  {
    "path": "gui/metrics_bar.py",
    "chars": 25839,
    "preview": "from dataclasses import dataclass\r\nfrom collections import deque\r\nfrom datetime import datetime\r\nfrom enum import IntEnu"
  },
  {
    "path": "gui/tabs.py",
    "chars": 986,
    "preview": "from PySide6.QtWidgets import QTabWidget\r\nfrom gui.tabs_settings.settings import GuiSettingsTab\r\nfrom gui.tabs_tools.too"
  },
  {
    "path": "gui/tabs_databases/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/tabs_databases/create.py",
    "chars": 21795,
    "preview": "import os\r\nimport sys\r\nimport time\r\nimport gc\r\nimport json\r\nimport shutil\r\nimport subprocess\r\nfrom pathlib import Path\r\n"
  },
  {
    "path": "gui/tabs_databases/manage.py",
    "chars": 11740,
    "preview": "import shutil\r\nimport sqlite3\r\nfrom pathlib import Path\r\n\r\nimport yaml\r\nfrom PySide6.QtCore import Qt, QAbstractTableMod"
  },
  {
    "path": "gui/tabs_databases/query.py",
    "chars": 27361,
    "preview": "import logging\r\nimport queue\r\nimport threading\r\nfrom pathlib import Path\r\nimport multiprocessing\r\nimport re\r\nimport html"
  },
  {
    "path": "gui/tabs_models/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/tabs_models/models.py",
    "chars": 9392,
    "preview": "import threading\r\nfrom pathlib import Path\r\n\r\nfrom PySide6.QtCore import Qt, QUrl\r\nfrom PySide6.QtGui import QDesktopSer"
  },
  {
    "path": "gui/tabs_settings/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/tabs_settings/database_create.py",
    "chars": 9595,
    "preview": "import yaml\r\nfrom PySide6.QtGui import QIntValidator\r\nfrom PySide6.QtWidgets import QWidget, QLabel, QLineEdit, QGridLay"
  },
  {
    "path": "gui/tabs_settings/database_query.py",
    "chars": 11830,
    "preview": "import yaml\r\nfrom PySide6.QtGui import QIntValidator, QDoubleValidator\r\nfrom PySide6.QtWidgets import (\r\n    QWidget,\r\n "
  },
  {
    "path": "gui/tabs_settings/settings.py",
    "chars": 3981,
    "preview": "import logging\r\nfrom functools import partial\r\nfrom PySide6.QtWidgets import (\r\n    QVBoxLayout,\r\n    QGroupBox,\r\n    QP"
  },
  {
    "path": "gui/tabs_settings/tts.py",
    "chars": 13691,
    "preview": "import yaml\r\nfrom pathlib import Path\r\nfrom PySide6.QtCore import Qt\r\nfrom PySide6.QtWidgets import (\r\n    QLabel, QComb"
  },
  {
    "path": "gui/tabs_settings/vision.py",
    "chars": 3867,
    "preview": "import yaml\r\nfrom pathlib import Path\r\nimport torch\r\nfrom PySide6.QtCore import Qt\r\nfrom PySide6.QtWidgets import QLabel"
  },
  {
    "path": "gui/tabs_tools/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gui/tabs_tools/misc.py",
    "chars": 10174,
    "preview": "from PySide6.QtWidgets import QVBoxLayout, QHBoxLayout, QPushButton, QWidget, QMessageBox, QSpinBox\r\nfrom PySide6.QtCore"
  },
  {
    "path": "gui/tabs_tools/ocr.py",
    "chars": 6617,
    "preview": "import time\r\nfrom pathlib import Path\r\nimport fitz\r\nfrom PySide6.QtWidgets import (\r\n    QWidget, QHBoxLayout, QVBoxLayo"
  },
  {
    "path": "gui/tabs_tools/scrape.py",
    "chars": 14651,
    "preview": "import os\nimport platform\nimport shutil\nimport subprocess\n\nfrom PySide6.QtCore import Qt, QThread, QSettings\nfrom PySide"
  },
  {
    "path": "gui/tabs_tools/tools.py",
    "chars": 2276,
    "preview": "from PySide6.QtWidgets import QVBoxLayout, QGroupBox, QWidget\r\nfrom PySide6.QtCore import QThread, Signal\r\nfrom gui.tabs"
  },
  {
    "path": "gui/tabs_tools/transcribe.py",
    "chars": 5401,
    "preview": "import threading\r\nfrom pathlib import Path\r\nimport yaml\r\nimport torch\r\nfrom PySide6.QtCore import Qt\r\nfrom PySide6.QtWid"
  },
  {
    "path": "gui/tabs_tools/vision.py",
    "chars": 19556,
    "preview": "import sys\r\nimport textwrap\r\nimport subprocess\r\nfrom pathlib import Path\r\nimport logging\r\nimport yaml\r\nimport tempfile\r\n"
  },
  {
    "path": "gui.py",
    "chars": 282,
    "preview": "import faulthandler\r\nfaulthandler.enable(all_threads=True)\r\n\r\nimport multiprocessing\r\nmultiprocessing.set_start_method('"
  },
  {
    "path": "modules/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "modules/kokoro.py",
    "chars": 7407,
    "preview": "import sys\r\nimport os\r\nfrom pathlib import Path\r\nimport queue\r\nimport threading\r\nimport re\r\nimport torch\r\nimport soundde"
  },
  {
    "path": "modules/ocr.py",
    "chars": 11971,
    "preview": "import os\r\nimport io\r\nimport tempfile\r\nimport threading\r\nimport queue\r\nimport time\r\nfrom pathlib import Path\r\nfrom io im"
  },
  {
    "path": "modules/process_images.py",
    "chars": 25170,
    "preview": "import os\r\nimport traceback\r\nimport inspect\r\nimport time\r\nimport warnings\r\nfrom concurrent.futures import ProcessPoolExe"
  },
  {
    "path": "modules/scraper.py",
    "chars": 29725,
    "preview": "import os\r\nimport re\r\nimport json\r\nimport asyncio\r\nimport textwrap\r\nimport aiofiles\r\nimport markdown\r\nfrom bs4 import Be"
  },
  {
    "path": "modules/transcribe.py",
    "chars": 7409,
    "preview": "from multiprocessing import Process\r\nfrom pathlib import Path\r\nimport warnings\r\nimport shutil\r\nimport json\r\n\r\nimport tor"
  },
  {
    "path": "modules/tts.py",
    "chars": 30975,
    "preview": "import queue\r\nimport re\r\nimport threading\r\nfrom pathlib import Path\r\n\r\nimport io\r\nimport numpy as np\r\nimport sounddevice"
  },
  {
    "path": "modules/voice_recorder.py",
    "chars": 4487,
    "preview": "import tempfile\r\nfrom pathlib import Path\r\n\r\nimport psutil\r\nimport sounddevice as sd\r\nimport numpy as np\r\nimport soundfi"
  },
  {
    "path": "setup_windows.py",
    "chars": 12281,
    "preview": "import os\r\nimport subprocess\r\nimport sys\r\n\r\ncache_dir = os.path.join(\r\n    os.environ.get(\"USERPROFILE\", os.path.expandu"
  },
  {
    "path": "tools/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tools/check_packages.py",
    "chars": 38179,
    "preview": "import importlib.util\r\nimport importlib.metadata\r\nimport sys\r\nimport urllib.request\r\nimport json\r\nimport subprocess\r\nfro"
  },
  {
    "path": "tools/chunk_userguide.py",
    "chars": 8730,
    "preview": "import sys\r\nimport os\r\nimport shutil\r\nfrom PySide6.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout"
  },
  {
    "path": "tools/replace_sourcecode.py",
    "chars": 9251,
    "preview": "import hashlib\r\nfrom pathlib import Path\r\nimport shutil\r\nimport sys\r\nimport zipfile\r\n\r\nfrom core.constants import PROJEC"
  }
]

// ... and 1 more files (download for full content)

About this extraction

This page contains the full source code of the BBC-Esq/ChromaDB-Plugin-for-LM-Studio GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 90 files (2.8 MB), approximately 725.8k tokens, and a symbol index with 1108 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo