SYMBOL INDEX (187 symbols across 29 files)

FILE: buster/busterbot.py
  class BusterConfig (line 17) | class BusterConfig:
  class Buster (line 83) | class Buster:
    method __init__ (line 84) | def __init__(
    method process_input (line 96) | def process_input(

FILE: buster/completers/base.py
  class Completion (line 18) | class Completion:
    method __init__ (line 45) | def __init__(
    method __repr__ (line 67) | def __repr__(self):
    method _validate_arguments (line 82) | def _validate_arguments(self, answer_generator: Optional[Iterator], an...
    method answer_relevant (line 103) | def answer_relevant(self) -> bool:
    method question_relevant (line 122) | def question_relevant(self):
    method answer_text (line 127) | def answer_text(self):
    method answer_text (line 134) | def answer_text(self, value: str) -> None:
    method answer_generator (line 138) | def answer_generator(self):
    method answer_generator (line 148) | def answer_generator(self, generator: Iterator) -> None:
    method postprocess (line 151) | def postprocess(self):
    method to_json (line 174) | def to_json(self, columns_to_ignore: Optional[list[str]] = None) -> Any:
    method from_dict (line 211) | def from_dict(cls, completion_dict: dict):
  class Completer (line 230) | class Completer(ABC):
    method complete (line 239) | def complete(self, prompt: str, user_input) -> (str | Iterator, bool):
  class DocumentAnswerer (line 244) | class DocumentAnswerer:
    method __init__ (line 262) | def __init__(
    method prepare_prompt (line 276) | def prepare_prompt(self, matched_documents) -> str:
    method get_completion (line 288) | def get_completion(

FILE: buster/completers/chatgpt.py
  class ChatGPTCompleter (line 29) | class ChatGPTCompleter(Completer):
    method __init__ (line 30) | def __init__(self, completion_kwargs: dict, client_kwargs: Optional[di...
    method complete (line 45) | def complete(self, prompt: str, user_input: str, completion_kwargs=Non...

FILE: buster/completers/user_inputs.py
  class UserInputs (line 6) | class UserInputs:
    method current_input (line 18) | def current_input(self):

FILE: buster/documents_manager/base.py
  class DocumentsManager (line 20) | class DocumentsManager(ABC):
    method __init__ (line 21) | def __init__(self, required_columns: Optional[list[str]] = None):
    method _check_required_columns (line 32) | def _check_required_columns(self, df: pd.DataFrame):
    method _checkpoint_csv (line 37) | def _checkpoint_csv(self, df, csv_filename: str, csv_overwrite: bool =...
    method add (line 63) | def add(
    method batch_add (line 107) | def batch_add(
    method _add_documents (line 180) | def _add_documents(self, df: pd.DataFrame, **add_kwargs):

FILE: buster/documents_manager/deeplake.py
  class DeepLakeDocumentsManager (line 14) | class DeepLakeDocumentsManager(DocumentsManager):
    method __init__ (line 15) | def __init__(
    method __len__ (line 37) | def __len__(self):
    method _extract_metadata (line 46) | def _extract_metadata(cls, df: pd.DataFrame) -> dict:
    method _add_documents (line 66) | def _add_documents(self, df: pd.DataFrame, **add_kwargs):
    method to_zip (line 94) | def to_zip(self, output_path: str = "."):

FILE: buster/documents_manager/service.py
  class DocumentsService (line 14) | class DocumentsService(DocumentsManager):
    method __init__ (line 17) | def __init__(
    method __repr__ (line 48) | def __repr__(self):
    method get_source_id (line 52) | def get_source_id(self, source: str) -> str:
    method _add_documents (line 63) | def _add_documents(self, df: pd.DataFrame):
    method update_source (line 106) | def update_source(self, source: str, display_name: str = None, note: s...
    method delete_source (line 118) | def delete_source(self, source: str) -> tuple[int, int]:
    method drop_db (line 138) | def drop_db(self):

FILE: buster/examples/cfg.py
  function setup_buster (line 114) | def setup_buster(buster_cfg: BusterConfig):

FILE: buster/examples/generate_embeddings.py
  function main (line 13) | def main(csv):

FILE: buster/examples/gradio_app.py
  function add_user_question (line 24) | def add_user_question(user_question: str, chat_history: Optional[ChatHis...
  function format_sources (line 35) | def format_sources(matched_documents: pd.DataFrame) -> str:
  function add_sources (line 57) | def add_sources(history, completion):
  function chat (line 65) | def chat(chat_history: ChatHistory) -> Tuple[ChatHistory, Completion]:

FILE: buster/formatters/documents.py
  class DocumentsFormatter (line 13) | class DocumentsFormatter(ABC):
    method format (line 22) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat...
  class DocumentsFormatterHTML (line 37) | class DocumentsFormatterHTML(DocumentsFormatter):
    method format (line 55) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat...
  class DocumentsFormatterJSON (line 103) | class DocumentsFormatterJSON(DocumentsFormatter):
    method format (line 117) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat...

FILE: buster/formatters/prompts.py
  class PromptFormatter (line 13) | class PromptFormatter:
    method format (line 20) | def format(self, documents: str) -> str:
  function prompt_formatter_factory (line 43) | def prompt_formatter_factory(tokenizer: Tokenizer, prompt_cfg) -> Prompt...

FILE: buster/llm_utils/embeddings.py
  function get_openai_embedding_constructor (line 15) | def get_openai_embedding_constructor(client_kwargs: Optional[dict] = Non...
  function cosine_similarity (line 43) | def cosine_similarity(a, b):
  function compute_embeddings_parallelized (line 47) | def compute_embeddings_parallelized(df: pd.DataFrame, embedding_fn: call...
  class BM25 (line 70) | class BM25:
    method __init__ (line 71) | def __init__(self, path_to_params: str = None) -> None:
    method fit (line 77) | def fit(self, df: pd.DataFrame):
    method dump_params (line 80) | def dump_params(self, path: str):
    method get_sparse_embedding_fn (line 83) | def get_sparse_embedding_fn(self):

FILE: buster/llm_utils/question_reformulator.py
  class QuestionReformulator (line 7) | class QuestionReformulator:
    method __init__ (line 8) | def __init__(
    method reformulate (line 32) | def reformulate(self, user_input: str) -> str:

FILE: buster/parsers/parser.py
  class Section (line 17) | class Section:
    method __post_init__ (line 23) | def __post_init__(self, nodes: list[bs4.element.NavigableString]):
    method __len__ (line 44) | def __len__(self) -> int:
    method from_text (line 48) | def from_text(cls, text: str, url: str, name: str) -> "Section":
    method get_chunks (line 58) | def get_chunks(self, min_length: int, max_length: int) -> Iterator["Se...
  class Parser (line 76) | class Parser(ABC):
    method relative_path (line 85) | def relative_path(self) -> str:
    method find_sections (line 97) | def find_sections(self) -> Iterator[Section]: ...
    method parse (line 99) | def parse(self) -> list[Section]:
  class SphinxParser (line 107) | class SphinxParser(Parser):
    method find_sections (line 108) | def find_sections(self) -> Iterator[Section]:
    method build_url (line 125) | def build_url(self, suffix: str) -> str:
  class HuggingfaceParser (line 129) | class HuggingfaceParser(Parser):
    method find_sections (line 130) | def find_sections(self) -> Iterator[Section]:
    method build_url (line 142) | def build_url(self, suffix: str) -> str:
  function get_document (line 146) | def get_document(
  function get_all_documents (line 179) | def get_all_documents(

FILE: buster/retriever/base.py
  class Retriever (line 19) | class Retriever(ABC):
    method __init__ (line 20) | def __init__(
    method get_documents (line 49) | def get_documents(self, source: Optional[str] = None) -> pd.DataFrame:
    method get_source_display_name (line 61) | def get_source_display_name(self, source: str) -> str:
    method get_topk_documents (line 75) | def get_topk_documents(self, query: str, source: Optional[str] = None,...
    method threshold_documents (line 90) | def threshold_documents(self, matched_documents: pd.DataFrame, thresh:...
    method retrieve (line 103) | def retrieve(

FILE: buster/retriever/deeplake.py
  function extract_metadata (line 14) | def extract_metadata(x: pd.DataFrame, columns) -> pd.DataFrame:
  function data_dict_to_df (line 29) | def data_dict_to_df(data: dict) -> pd.DataFrame:
  function build_tql_query (line 54) | def build_tql_query(embedding, sources=None, top_k: int = 3) -> str:
  class DeepLakeRetriever (line 86) | class DeepLakeRetriever(Retriever):
    method __init__ (line 87) | def __init__(
    method get_documents (line 117) | def get_documents(self, sources: Optional[list[str]] = None) -> pd.Dat...
    method get_source_display_name (line 135) | def get_source_display_name(self, source: str) -> str:
    method get_topk_documents (line 149) | def get_topk_documents(

FILE: buster/retriever/service.py
  class ServiceRetriever (line 17) | class ServiceRetriever(Retriever):
    method __init__ (line 18) | def __init__(
    method get_source_id (line 53) | def get_source_id(self, source: str) -> str:
    method get_documents (line 65) | def get_documents(self, source: Optional[str] = None) -> pd.DataFrame:
    method get_source_display_name (line 88) | def get_source_display_name(self, source: str) -> str:
    method get_topk_documents (line 103) | def get_topk_documents(self, query: str, sources: Optional[List[str]],...

FILE: buster/tokenizers/__init__.py
  function tokenizer_factory (line 5) | def tokenizer_factory(tokenizer_cfg: dict) -> Tokenizer:

FILE: buster/tokenizers/base.py
  class Tokenizer (line 5) | class Tokenizer(ABC):
    method __init__ (line 16) | def __init__(self, model_name: str):
    method encode (line 20) | def encode(self, string: str) -> list[int]:
    method decode (line 34) | def decode(self, encoded: list[int]) -> str:
    method num_tokens (line 47) | def num_tokens(self, string: str, return_encoded: bool = False) -> Uni...

FILE: buster/tokenizers/gpt.py
  class GPTTokenizer (line 6) | class GPTTokenizer(Tokenizer):
    method __init__ (line 19) | def __init__(self, model_name: str):
    method encode (line 23) | def encode(self, string: str):
    method decode (line 35) | def decode(self, encoded: list[int]):

FILE: buster/utils.py
  function get_file_extension (line 6) | def get_file_extension(filepath: str) -> str:
  function download_db (line 10) | def download_db(db_url: str, output_dir: str):
  function zip_contents (line 22) | def zip_contents(input_path, output_path):
  function extract_zip (line 51) | def extract_zip(zip_file_path, output_path):

FILE: buster/validators/base.py
  class Validator (line 16) | class Validator:
    method __init__ (line 17) | def __init__(
    method check_question_relevance (line 49) | def check_question_relevance(self, question: str) -> tuple[bool, str]:
    method check_answer_relevance (line 61) | def check_answer_relevance(self, answer: str) -> bool:
    method check_documents_relevance (line 73) | def check_documents_relevance(self, answer: str, matched_documents: pd...
    method rerank_docs (line 86) | def rerank_docs(

FILE: buster/validators/validators.py
  class QuestionValidator (line 16) | class QuestionValidator:
    method __init__ (line 17) | def __init__(
    method check_question_relevance (line 56) | def check_question_relevance(self, question: str) -> tuple[bool, str]:
  class AnswerValidator (line 74) | class AnswerValidator:
    method __init__ (line 75) | def __init__(
    method check_answer_relevance (line 96) | def check_answer_relevance(self, answer: str) -> bool:
  class DocumentsValidator (line 114) | class DocumentsValidator:
    method __init__ (line 115) | def __init__(
    method check_document_relevance (line 150) | def check_document_relevance(self, answer: str, document: str) -> bool:
    method check_documents_relevance (line 163) | def check_documents_relevance(self, answer: str, matched_documents: pd...

FILE: tests/test_chatbot.py
  function get_fake_embedding (line 90) | def get_fake_embedding(length=1536):
  class MockAnswerer (line 95) | class MockAnswerer(Completer):
    method __init__ (line 96) | def __init__(self, expected_answer):
    method prepare_prompt (line 99) | def prepare_prompt(self, user_inputs, matched_documents):
    method complete (line 102) | def complete(self):
    method get_completion (line 105) | def get_completion(self, user_inputs, matched_documents, validator, *a...
  class MockRetriever (line 115) | class MockRetriever(Retriever):
    method __init__ (line 116) | def __init__(self, **kwargs):
    method get_documents (line 136) | def get_documents(self, source):
    method get_topk_documents (line 139) | def get_topk_documents(self, query: str, sources: list[str] = None, to...
    method get_source_display_name (line 145) | def get_source_display_name(self, source):
  class MockValidator (line 149) | class MockValidator:
    method __init__ (line 150) | def __init__(self, *args, **kwargs):
    method validate (line 153) | def validate(self, completion):
    method check_question_relevance (line 157) | def check_question_relevance(self, *args, **kwargs):
    method check_answer_relevance (line 160) | def check_answer_relevance(self, *args, **kwargs):
  function vector_store_path (line 165) | def vector_store_path(tmp_path_factory):
  function test_chatbot_mock_data (line 176) | def test_chatbot_mock_data(tmp_path, monkeypatch):
  function test_chatbot_real_data__chatGPT (line 196) | def test_chatbot_real_data__chatGPT(vector_store_path):
  function test_chatbot_real_data__chatGPT_OOD (line 219) | def test_chatbot_real_data__chatGPT_OOD(vector_store_path):
  function test_chatbot_real_data__no_docs_found (line 256) | def test_chatbot_real_data__no_docs_found(vector_store_path):

FILE: tests/test_documents.py
  function get_fake_embedding (line 17) | def get_fake_embedding(*arg, **kwargs):
  function test_write_read (line 25) | def test_write_read(tmp_path, documents_manager, retriever):
  function test_write_write_read (line 64) | def test_write_write_read(tmp_path, documents_manager, retriever):
  function test_generate_embeddings (line 109) | def test_generate_embeddings(tmp_path, monkeypatch):
  function test_generate_embeddings_parallelized (line 137) | def test_generate_embeddings_parallelized():
  function test_add_batches (line 161) | def test_add_batches(tmp_path):

FILE: tests/test_formatters.py
  function test_DocumentsDormatterHTML__simple (line 11) | def test_DocumentsDormatterHTML__simple():
  function test_DocumentsDormatterJSON__simple (line 41) | def test_DocumentsDormatterJSON__simple():
  function test_DocumentsFormatterHTML__doc_to_long (line 78) | def test_DocumentsFormatterHTML__doc_to_long():
  function test_DocumentsFormatterJSON__doc_too_long (line 109) | def test_DocumentsFormatterJSON__doc_too_long():
  function test_DocumentsFormatterHTML__doc_to_long_2 (line 156) | def test_DocumentsFormatterHTML__doc_to_long_2():
  function test_DocumentsFormatterHTML__complex_format (line 185) | def test_DocumentsFormatterHTML__complex_format():
  function test_system_prompt_formatter (line 230) | def test_system_prompt_formatter():
  function test_system_prompt_formatter__to_long (line 249) | def test_system_prompt_formatter__to_long():

FILE: tests/test_read_write.py
  class MockValidator (line 6) | class MockValidator:
    method __init__ (line 7) | def __init__(self):
    method check_answer_relevance (line 10) | def check_answer_relevance(self, completion: Completion) -> bool:
    method rerank_docs (line 13) | def rerank_docs(self, answer: str, matched_documents: pd.DataFrame) ->...
  function test_read_write_completion (line 17) | def test_read_write_completion():

FILE: tests/test_validator.py
  function test_validator_check_question_relevance (line 28) | def test_validator_check_question_relevance():
  function test_validator_check_answer_relevance (line 38) | def test_validator_check_answer_relevance():
  function test_validator_check_documents_relevance (line 46) | def test_validator_check_documents_relevance():
  function test_validator_rerank_docs (line 65) | def test_validator_rerank_docs():