SYMBOL INDEX (187 symbols across 29 files) FILE: buster/busterbot.py class BusterConfig (line 17) | class BusterConfig: class Buster (line 83) | class Buster: method __init__ (line 84) | def __init__( method process_input (line 96) | def process_input( FILE: buster/completers/base.py class Completion (line 18) | class Completion: method __init__ (line 45) | def __init__( method __repr__ (line 67) | def __repr__(self): method _validate_arguments (line 82) | def _validate_arguments(self, answer_generator: Optional[Iterator], an... method answer_relevant (line 103) | def answer_relevant(self) -> bool: method question_relevant (line 122) | def question_relevant(self): method answer_text (line 127) | def answer_text(self): method answer_text (line 134) | def answer_text(self, value: str) -> None: method answer_generator (line 138) | def answer_generator(self): method answer_generator (line 148) | def answer_generator(self, generator: Iterator) -> None: method postprocess (line 151) | def postprocess(self): method to_json (line 174) | def to_json(self, columns_to_ignore: Optional[list[str]] = None) -> Any: method from_dict (line 211) | def from_dict(cls, completion_dict: dict): class Completer (line 230) | class Completer(ABC): method complete (line 239) | def complete(self, prompt: str, user_input) -> (str | Iterator, bool): class DocumentAnswerer (line 244) | class DocumentAnswerer: method __init__ (line 262) | def __init__( method prepare_prompt (line 276) | def prepare_prompt(self, matched_documents) -> str: method get_completion (line 288) | def get_completion( FILE: buster/completers/chatgpt.py class ChatGPTCompleter (line 29) | class ChatGPTCompleter(Completer): method __init__ (line 30) | def __init__(self, completion_kwargs: dict, client_kwargs: Optional[di... method complete (line 45) | def complete(self, prompt: str, user_input: str, completion_kwargs=Non... FILE: buster/completers/user_inputs.py class UserInputs (line 6) | class UserInputs: method current_input (line 18) | def current_input(self): FILE: buster/documents_manager/base.py class DocumentsManager (line 20) | class DocumentsManager(ABC): method __init__ (line 21) | def __init__(self, required_columns: Optional[list[str]] = None): method _check_required_columns (line 32) | def _check_required_columns(self, df: pd.DataFrame): method _checkpoint_csv (line 37) | def _checkpoint_csv(self, df, csv_filename: str, csv_overwrite: bool =... method add (line 63) | def add( method batch_add (line 107) | def batch_add( method _add_documents (line 180) | def _add_documents(self, df: pd.DataFrame, **add_kwargs): FILE: buster/documents_manager/deeplake.py class DeepLakeDocumentsManager (line 14) | class DeepLakeDocumentsManager(DocumentsManager): method __init__ (line 15) | def __init__( method __len__ (line 37) | def __len__(self): method _extract_metadata (line 46) | def _extract_metadata(cls, df: pd.DataFrame) -> dict: method _add_documents (line 66) | def _add_documents(self, df: pd.DataFrame, **add_kwargs): method to_zip (line 94) | def to_zip(self, output_path: str = "."): FILE: buster/documents_manager/service.py class DocumentsService (line 14) | class DocumentsService(DocumentsManager): method __init__ (line 17) | def __init__( method __repr__ (line 48) | def __repr__(self): method get_source_id (line 52) | def get_source_id(self, source: str) -> str: method _add_documents (line 63) | def _add_documents(self, df: pd.DataFrame): method update_source (line 106) | def update_source(self, source: str, display_name: str = None, note: s... method delete_source (line 118) | def delete_source(self, source: str) -> tuple[int, int]: method drop_db (line 138) | def drop_db(self): FILE: buster/examples/cfg.py function setup_buster (line 114) | def setup_buster(buster_cfg: BusterConfig): FILE: buster/examples/generate_embeddings.py function main (line 13) | def main(csv): FILE: buster/examples/gradio_app.py function add_user_question (line 24) | def add_user_question(user_question: str, chat_history: Optional[ChatHis... function format_sources (line 35) | def format_sources(matched_documents: pd.DataFrame) -> str: function add_sources (line 57) | def add_sources(history, completion): function chat (line 65) | def chat(chat_history: ChatHistory) -> Tuple[ChatHistory, Completion]: FILE: buster/formatters/documents.py class DocumentsFormatter (line 13) | class DocumentsFormatter(ABC): method format (line 22) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat... class DocumentsFormatterHTML (line 37) | class DocumentsFormatterHTML(DocumentsFormatter): method format (line 55) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat... class DocumentsFormatterJSON (line 103) | class DocumentsFormatterJSON(DocumentsFormatter): method format (line 117) | def format(self, matched_documents: pd.DataFrame) -> tuple[str, pd.Dat... FILE: buster/formatters/prompts.py class PromptFormatter (line 13) | class PromptFormatter: method format (line 20) | def format(self, documents: str) -> str: function prompt_formatter_factory (line 43) | def prompt_formatter_factory(tokenizer: Tokenizer, prompt_cfg) -> Prompt... FILE: buster/llm_utils/embeddings.py function get_openai_embedding_constructor (line 15) | def get_openai_embedding_constructor(client_kwargs: Optional[dict] = Non... function cosine_similarity (line 43) | def cosine_similarity(a, b): function compute_embeddings_parallelized (line 47) | def compute_embeddings_parallelized(df: pd.DataFrame, embedding_fn: call... class BM25 (line 70) | class BM25: method __init__ (line 71) | def __init__(self, path_to_params: str = None) -> None: method fit (line 77) | def fit(self, df: pd.DataFrame): method dump_params (line 80) | def dump_params(self, path: str): method get_sparse_embedding_fn (line 83) | def get_sparse_embedding_fn(self): FILE: buster/llm_utils/question_reformulator.py class QuestionReformulator (line 7) | class QuestionReformulator: method __init__ (line 8) | def __init__( method reformulate (line 32) | def reformulate(self, user_input: str) -> str: FILE: buster/parsers/parser.py class Section (line 17) | class Section: method __post_init__ (line 23) | def __post_init__(self, nodes: list[bs4.element.NavigableString]): method __len__ (line 44) | def __len__(self) -> int: method from_text (line 48) | def from_text(cls, text: str, url: str, name: str) -> "Section": method get_chunks (line 58) | def get_chunks(self, min_length: int, max_length: int) -> Iterator["Se... class Parser (line 76) | class Parser(ABC): method relative_path (line 85) | def relative_path(self) -> str: method find_sections (line 97) | def find_sections(self) -> Iterator[Section]: ... method parse (line 99) | def parse(self) -> list[Section]: class SphinxParser (line 107) | class SphinxParser(Parser): method find_sections (line 108) | def find_sections(self) -> Iterator[Section]: method build_url (line 125) | def build_url(self, suffix: str) -> str: class HuggingfaceParser (line 129) | class HuggingfaceParser(Parser): method find_sections (line 130) | def find_sections(self) -> Iterator[Section]: method build_url (line 142) | def build_url(self, suffix: str) -> str: function get_document (line 146) | def get_document( function get_all_documents (line 179) | def get_all_documents( FILE: buster/retriever/base.py class Retriever (line 19) | class Retriever(ABC): method __init__ (line 20) | def __init__( method get_documents (line 49) | def get_documents(self, source: Optional[str] = None) -> pd.DataFrame: method get_source_display_name (line 61) | def get_source_display_name(self, source: str) -> str: method get_topk_documents (line 75) | def get_topk_documents(self, query: str, source: Optional[str] = None,... method threshold_documents (line 90) | def threshold_documents(self, matched_documents: pd.DataFrame, thresh:... method retrieve (line 103) | def retrieve( FILE: buster/retriever/deeplake.py function extract_metadata (line 14) | def extract_metadata(x: pd.DataFrame, columns) -> pd.DataFrame: function data_dict_to_df (line 29) | def data_dict_to_df(data: dict) -> pd.DataFrame: function build_tql_query (line 54) | def build_tql_query(embedding, sources=None, top_k: int = 3) -> str: class DeepLakeRetriever (line 86) | class DeepLakeRetriever(Retriever): method __init__ (line 87) | def __init__( method get_documents (line 117) | def get_documents(self, sources: Optional[list[str]] = None) -> pd.Dat... method get_source_display_name (line 135) | def get_source_display_name(self, source: str) -> str: method get_topk_documents (line 149) | def get_topk_documents( FILE: buster/retriever/service.py class ServiceRetriever (line 17) | class ServiceRetriever(Retriever): method __init__ (line 18) | def __init__( method get_source_id (line 53) | def get_source_id(self, source: str) -> str: method get_documents (line 65) | def get_documents(self, source: Optional[str] = None) -> pd.DataFrame: method get_source_display_name (line 88) | def get_source_display_name(self, source: str) -> str: method get_topk_documents (line 103) | def get_topk_documents(self, query: str, sources: Optional[List[str]],... FILE: buster/tokenizers/__init__.py function tokenizer_factory (line 5) | def tokenizer_factory(tokenizer_cfg: dict) -> Tokenizer: FILE: buster/tokenizers/base.py class Tokenizer (line 5) | class Tokenizer(ABC): method __init__ (line 16) | def __init__(self, model_name: str): method encode (line 20) | def encode(self, string: str) -> list[int]: method decode (line 34) | def decode(self, encoded: list[int]) -> str: method num_tokens (line 47) | def num_tokens(self, string: str, return_encoded: bool = False) -> Uni... FILE: buster/tokenizers/gpt.py class GPTTokenizer (line 6) | class GPTTokenizer(Tokenizer): method __init__ (line 19) | def __init__(self, model_name: str): method encode (line 23) | def encode(self, string: str): method decode (line 35) | def decode(self, encoded: list[int]): FILE: buster/utils.py function get_file_extension (line 6) | def get_file_extension(filepath: str) -> str: function download_db (line 10) | def download_db(db_url: str, output_dir: str): function zip_contents (line 22) | def zip_contents(input_path, output_path): function extract_zip (line 51) | def extract_zip(zip_file_path, output_path): FILE: buster/validators/base.py class Validator (line 16) | class Validator: method __init__ (line 17) | def __init__( method check_question_relevance (line 49) | def check_question_relevance(self, question: str) -> tuple[bool, str]: method check_answer_relevance (line 61) | def check_answer_relevance(self, answer: str) -> bool: method check_documents_relevance (line 73) | def check_documents_relevance(self, answer: str, matched_documents: pd... method rerank_docs (line 86) | def rerank_docs( FILE: buster/validators/validators.py class QuestionValidator (line 16) | class QuestionValidator: method __init__ (line 17) | def __init__( method check_question_relevance (line 56) | def check_question_relevance(self, question: str) -> tuple[bool, str]: class AnswerValidator (line 74) | class AnswerValidator: method __init__ (line 75) | def __init__( method check_answer_relevance (line 96) | def check_answer_relevance(self, answer: str) -> bool: class DocumentsValidator (line 114) | class DocumentsValidator: method __init__ (line 115) | def __init__( method check_document_relevance (line 150) | def check_document_relevance(self, answer: str, document: str) -> bool: method check_documents_relevance (line 163) | def check_documents_relevance(self, answer: str, matched_documents: pd... FILE: tests/test_chatbot.py function get_fake_embedding (line 90) | def get_fake_embedding(length=1536): class MockAnswerer (line 95) | class MockAnswerer(Completer): method __init__ (line 96) | def __init__(self, expected_answer): method prepare_prompt (line 99) | def prepare_prompt(self, user_inputs, matched_documents): method complete (line 102) | def complete(self): method get_completion (line 105) | def get_completion(self, user_inputs, matched_documents, validator, *a... class MockRetriever (line 115) | class MockRetriever(Retriever): method __init__ (line 116) | def __init__(self, **kwargs): method get_documents (line 136) | def get_documents(self, source): method get_topk_documents (line 139) | def get_topk_documents(self, query: str, sources: list[str] = None, to... method get_source_display_name (line 145) | def get_source_display_name(self, source): class MockValidator (line 149) | class MockValidator: method __init__ (line 150) | def __init__(self, *args, **kwargs): method validate (line 153) | def validate(self, completion): method check_question_relevance (line 157) | def check_question_relevance(self, *args, **kwargs): method check_answer_relevance (line 160) | def check_answer_relevance(self, *args, **kwargs): function vector_store_path (line 165) | def vector_store_path(tmp_path_factory): function test_chatbot_mock_data (line 176) | def test_chatbot_mock_data(tmp_path, monkeypatch): function test_chatbot_real_data__chatGPT (line 196) | def test_chatbot_real_data__chatGPT(vector_store_path): function test_chatbot_real_data__chatGPT_OOD (line 219) | def test_chatbot_real_data__chatGPT_OOD(vector_store_path): function test_chatbot_real_data__no_docs_found (line 256) | def test_chatbot_real_data__no_docs_found(vector_store_path): FILE: tests/test_documents.py function get_fake_embedding (line 17) | def get_fake_embedding(*arg, **kwargs): function test_write_read (line 25) | def test_write_read(tmp_path, documents_manager, retriever): function test_write_write_read (line 64) | def test_write_write_read(tmp_path, documents_manager, retriever): function test_generate_embeddings (line 109) | def test_generate_embeddings(tmp_path, monkeypatch): function test_generate_embeddings_parallelized (line 137) | def test_generate_embeddings_parallelized(): function test_add_batches (line 161) | def test_add_batches(tmp_path): FILE: tests/test_formatters.py function test_DocumentsDormatterHTML__simple (line 11) | def test_DocumentsDormatterHTML__simple(): function test_DocumentsDormatterJSON__simple (line 41) | def test_DocumentsDormatterJSON__simple(): function test_DocumentsFormatterHTML__doc_to_long (line 78) | def test_DocumentsFormatterHTML__doc_to_long(): function test_DocumentsFormatterJSON__doc_too_long (line 109) | def test_DocumentsFormatterJSON__doc_too_long(): function test_DocumentsFormatterHTML__doc_to_long_2 (line 156) | def test_DocumentsFormatterHTML__doc_to_long_2(): function test_DocumentsFormatterHTML__complex_format (line 185) | def test_DocumentsFormatterHTML__complex_format(): function test_system_prompt_formatter (line 230) | def test_system_prompt_formatter(): function test_system_prompt_formatter__to_long (line 249) | def test_system_prompt_formatter__to_long(): FILE: tests/test_read_write.py class MockValidator (line 6) | class MockValidator: method __init__ (line 7) | def __init__(self): method check_answer_relevance (line 10) | def check_answer_relevance(self, completion: Completion) -> bool: method rerank_docs (line 13) | def rerank_docs(self, answer: str, matched_documents: pd.DataFrame) ->... function test_read_write_completion (line 17) | def test_read_write_completion(): FILE: tests/test_validator.py function test_validator_check_question_relevance (line 28) | def test_validator_check_question_relevance(): function test_validator_check_answer_relevance (line 38) | def test_validator_check_answer_relevance(): function test_validator_check_documents_relevance (line 46) | def test_validator_check_documents_relevance(): function test_validator_rerank_docs (line 65) | def test_validator_rerank_docs():