SYMBOL INDEX (1755 symbols across 130 files)

FILE: abacus-research/biodex-ablation.py
  class BiodexValidator (line 32) | class BiodexValidator(pz.Validator):
    method __init__ (line 33) | def __init__(
    method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 147) | def __init__(
    method __len__ (line 169) | def __len__(self):
    method __getitem__ (line 172) | def __getitem__(self, idx: int):
  function search_func (line 291) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 393) | def compute_target_record(entry):
  function rank_precision_at_k (line 405) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 422) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-demo.py
  class BiodexValidator (line 32) | class BiodexValidator(pz.Validator):
    method __init__ (line 33) | def __init__(
    method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 147) | def __init__(
    method __len__ (line 169) | def __len__(self):
    method __getitem__ (line 172) | def __getitem__(self, idx: int):
  function search_func (line 332) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 435) | def compute_target_record(entry):
  function rank_precision_at_k (line 447) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 464) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-max-quality-at-cost.py
  class BiodexValidator (line 34) | class BiodexValidator(pz.Validator):
    method __init__ (line 35) | def __init__(
    method _compute_pmid_to_label (line 57) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 70) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 95) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 125) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 138) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 148) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 149) | def __init__(
    method __len__ (line 171) | def __len__(self):
    method __getitem__ (line 174) | def __getitem__(self, idx: int):
  function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 423) | def compute_target_record(entry):
  function rank_precision_at_k (line 435) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 452) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-min-at-fixed-quality.py
  class BiodexValidator (line 33) | class BiodexValidator(pz.Validator):
    method __init__ (line 34) | def __init__(
    method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 148) | def __init__(
    method __len__ (line 170) | def __len__(self):
    method __getitem__ (line 173) | def __getitem__(self, idx: int):
  function search_func (line 318) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 415) | def compute_target_record(entry):
  function rank_precision_at_k (line 427) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 444) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-pareto-cascades.py
  class BiodexValidator (line 33) | class BiodexValidator(pz.Validator):
    method __init__ (line 34) | def __init__(
    method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 148) | def __init__(
    method __len__ (line 170) | def __len__(self):
    method __getitem__ (line 173) | def __getitem__(self, idx: int):
  function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 420) | def compute_target_record(entry):
  function rank_precision_at_k (line 432) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 449) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/cuad-demo.py
  function get_label_df (line 267) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame:
  function get_jaccard (line 325) | def get_jaccard(label, pred):
  function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 401) | def handle_empty_preds(preds):
  class CUADValidator (line 413) | class CUADValidator(pz.Validator):
    method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42):
    method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method _compute_contract_id_to_labels (line 440) | def _compute_contract_id_to_labels(self):
  class CUADDataset (line 493) | class CUADDataset(pz.IterDataset):
    method __init__ (line 494) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 511) | def _construct_dataset(self, dataset, num_contracts, seed: int=42):
    method __len__ (line 544) | def __len__(self):
    method __getitem__ (line 547) | def __getitem__(self, idx: int):
  function compute_precision_recall (line 553) | def compute_precision_recall(label_df, preds_df):
  function parse_arguments (line 589) | def parse_arguments():
  function build_cuad_query (line 665) | def build_cuad_query(dataset, mode):
  function main (line 691) | def main():

FILE: abacus-research/cuad-max-quality-at-cost.py
  function get_label_df (line 268) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame:
  function get_jaccard (line 325) | def get_jaccard(label, pred):
  function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 401) | def handle_empty_preds(preds):
  class CUADValidator (line 413) | class CUADValidator(pz.Validator):
    method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42):
    method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method _compute_contract_id_to_labels (line 439) | def _compute_contract_id_to_labels(self):
  class CUADDataset (line 492) | class CUADDataset(pz.IterDataset):
    method __init__ (line 493) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 509) | def _construct_dataset(self, dataset, num_contracts, seed: int=42):
    method __len__ (line 542) | def __len__(self):
    method __getitem__ (line 545) | def __getitem__(self, idx: int):
  function compute_precision_recall (line 551) | def compute_precision_recall(label_df, preds_df):
  function parse_arguments (line 587) | def parse_arguments():
  function build_cuad_query (line 650) | def build_cuad_query(dataset, mode):
  function main (line 677) | def main():

FILE: abacus-research/cuad_data_loader.py
  function load_cuad_data (line 14) | def load_cuad_data(split="test", data_dir=None):
  function get_unique_contracts (line 60) | def get_unique_contracts(dataset):
  function filter_by_contracts (line 69) | def filter_by_contracts(dataset, contract_titles):
  function sample_contracts (line 74) | def sample_contracts(dataset, num_contracts, seed=42):

FILE: abacus-research/helper-scripts/generate-prior-stats-biodex-first-convert.py
  class BiodexDataset (line 26) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 27) | def __init__(
    method compute_label (line 49) | def compute_label(self, entry: dict) -> dict:
    method term_recall (line 59) | def term_recall(preds: list | None, targets: list):
    method __len__ (line 89) | def __len__(self):
    method __getitem__ (line 92) | def __getitem__(self, idx: int):

FILE: abacus-research/helper-scripts/generate-prior-stats-biodex.py
  class BiodexDataset (line 35) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 36) | def __init__(
    method compute_label (line 58) | def compute_label(self, entry: dict) -> dict:
    method rank_precision_at_k (line 71) | def rank_precision_at_k(preds: list | None, targets: list, k: int):
    method term_recall (line 97) | def term_recall(preds: list | None, targets: list):
    method __len__ (line 127) | def __len__(self):
    method __getitem__ (line 130) | def __getitem__(self, idx: int):
  function search_func (line 202) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...

FILE: abacus-research/helper-scripts/generate-prior-stats-cuad.py
  function get_jaccard (line 273) | def get_jaccard(label, pred):
  function evaluate_entry (line 295) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 350) | def handle_empty_preds(preds):
  function compute_precision_recall (line 365) | def compute_precision_recall(label_df, preds_df):
  class CUADDataset (line 400) | class CUADDataset(pz.IterDataset):
    method __init__ (line 401) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 419) | def _construct_dataset(self, dataset, num_contracts, seed: int=42, inc...
    method __len__ (line 484) | def __len__(self):
    method __getitem__ (line 487) | def __getitem__(self, idx: int):
    method get_label_df (line 490) | def get_label_df(self):
  function parse_arguments (line 505) | def parse_arguments():
  function build_cuad_query (line 513) | def build_cuad_query(dataset, mode):
  function main (line 540) | def main():

FILE: abacus-research/helper-scripts/mmqa-baseline.py
  function f1 (line 14) | def f1(preds: list | None, targets: list):

FILE: abacus-research/mmqa-complex-demo.py
  function get_json_from_answer (line 57) | def get_json_from_answer(answer: str):
  class MMQAValidator (line 93) | class MMQAValidator(pz.Validator):
    method __init__ (line 94) | def __init__(self, dataset: list[dict]):
    method _compute_qid_to_labels (line 101) | def _compute_qid_to_labels(self) -> dict:
    method recall (line 121) | def recall(self, preds: list | None, targets: list):
    method f1 (line 151) | def f1(self, preds: list | None, targets: list):
    method map_score_fn (line 189) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method join_score_fn (line 196) | def join_score_fn(self, condition: str, left_input_record: dict, right...
  class MMQAQuestionDataset (line 213) | class MMQAQuestionDataset(pz.IterDataset):
    method __init__ (line 214) | def __init__(self, dataset: list[dict]):
    method __len__ (line 218) | def __len__(self):
    method __getitem__ (line 221) | def __getitem__(self, idx: int):
  class MMQATextDataset (line 225) | class MMQATextDataset(pz.IterDataset):
    method __init__ (line 226) | def __init__(self, dataset: list[dict]):
    method __len__ (line 245) | def __len__(self):
    method __getitem__ (line 248) | def __getitem__(self, idx: int):
  class MMQATableDataset (line 252) | class MMQATableDataset(pz.IterDataset):
    method __init__ (line 253) | def __init__(self, dataset: list[dict]):
    method __len__ (line 296) | def __len__(self):
    method __getitem__ (line 299) | def __getitem__(self, idx: int):
  class MMQAImageDataset (line 303) | class MMQAImageDataset(pz.IterDataset):
    method __init__ (line 304) | def __init__(self, dataset: list[dict]):
    method __len__ (line 341) | def __len__(self):
    method __getitem__ (line 344) | def __getitem__(self, idx: int):
  function get_dataset (line 348) | def get_dataset(split: str, shuffle: bool, seed: int, num_samples: int |...
  function compute_f1 (line 364) | def compute_f1(final_df, answers_df):

FILE: abacus-research/mmqa-demo.py
  function get_json_from_answer (line 46) | def get_json_from_answer(answer: str):
  class MMQAValidator (line 82) | class MMQAValidator(pz.Validator):
    method __init__ (line 83) | def __init__(
    method _compute_qid_to_labels (line 113) | def _compute_qid_to_labels(self) -> dict:
    method recall (line 139) | def recall(self, preds: list | None, targets: list):
    method f1 (line 172) | def f1(self, preds: list | None, targets: list):
    method map_score_fn (line 213) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 218) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class MMQADataset (line 235) | class MMQADataset(pz.IterDataset):
    method __init__ (line 236) | def __init__(
    method __len__ (line 265) | def __len__(self):
    method __getitem__ (line 268) | def __getitem__(self, idx: int):
  function compute_f1 (line 282) | def compute_f1(final_df, answers_df):
  function get_results_and_ids (line 456) | def get_results_and_ids(index: chromadb.Collection, query: list[list[flo...
  function text_search_func (line 482) | def text_search_func(index: chromadb.Collection, query: list[list[float]...
  function table_search_func (line 487) | def table_search_func(index: chromadb.Collection, query: list[list[float...
  function image_search_func (line 492) | def image_search_func(index: chromadb.Collection, query: list[list[float...

FILE: abacus-research/score_biodex.py
  function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_cuad.py
  function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_mmqa.py
  function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_mmqa_complex.py
  function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/setup_cuad_data.py
  function setup_cuad_data (line 12) | def setup_cuad_data():

FILE: demos/audio-demo.py
  class SmallAudioDataset (line 8) | class SmallAudioDataset(pz.AudioFileDataset):
    method __init__ (line 9) | def __init__(self, *args, **kwargs):

FILE: demos/caching-demo.py
  class TravelRequestDataset (line 169) | class TravelRequestDataset(pz.IterDataset):
    method __init__ (line 172) | def __init__(self, requests: List[str]):
    method __len__ (line 176) | def __len__(self):
    method __getitem__ (line 179) | def __getitem__(self, idx: int):
  function get_model_from_string (line 198) | def get_model_from_string(model_str: str) -> Model:
  function print_cache_stats (line 207) | def print_cache_stats(execution_stats):
  function main (line 247) | def main():

FILE: demos/demo_core.py
  function build_sci_paper_plan (line 30) | def build_sci_paper_plan(dataset):
  function build_test_pdf_plan (line 34) | def build_test_pdf_plan(dataset):
  function build_mit_battery_paper_plan (line 38) | def build_mit_battery_paper_plan(dataset):
  function build_enron_plan (line 45) | def build_enron_plan(dataset):
  function compute_enron_stats (line 49) | def compute_enron_stats(dataset):
  function enron_gby_plan (line 55) | def enron_gby_plan(dataset):
  function enron_count_plan (line 65) | def enron_count_plan(dataset):
  function enron_average_count_plan (line 75) | def enron_average_count_plan(dataset):
  function enron_limit_plan (line 90) | def enron_limit_plan(dataset, limit=5):
  function build_image_plan (line 96) | def build_image_plan(dataset):
  function build_image_agg_plan (line 103) | def build_image_agg_plan(dataset):
  function build_join_plan (line 115) | def build_join_plan(dataset1, dataset2):
  function build_join_image_plan (line 122) | def build_join_image_plan(dataset1, dataset2):
  function get_task_config (line 129) | def get_task_config(task, dataset, join_dataset=None):
  function execute_task (line 188) | def execute_task(task, dataset, policy, join_dataset=None, verbose=False...
  function format_results_table (line 206) | def format_results_table(records: list[DataRecord], cols=None):

FILE: demos/enron-demo.py
  class EnronValidator (line 8) | class EnronValidator(pz.Validator):
    method __init__ (line 9) | def __init__(self, labels_file: str):
    method map_score_fn (line 17) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
  class EnronDataset (line 27) | class EnronDataset(pz.IterDataset):
    method __init__ (line 28) | def __init__(self, dir: str, labels_file: str | None = None, split: st...
    method __len__ (line 37) | def __len__(self):
    method __getitem__ (line 40) | def __getitem__(self, idx: int):

FILE: demos/image-demo.py
  function build_image_plan (line 22) | def build_image_plan(dataset):

FILE: demos/join-demo.py
  function run_text_join (line 16) | def run_text_join():
  function run_image_join (line 30) | def run_image_join():
  function run_text_image_join (line 44) | def run_text_image_join():

FILE: demos/paper-demo.py
  function print_table (line 15) | def print_table(records, cols=None, plan_str=None):
  function within_two_miles_of_mit (line 50) | def within_two_miles_of_mit(record: dict):
  function in_price_range (line 59) | def in_price_range(record: dict):
  class RealEstateListingDataset (line 118) | class RealEstateListingDataset(pz.IterDataset):
    method __init__ (line 119) | def __init__(self, listings_dir):
    method __len__ (line 125) | def __len__(self):
    method __getitem__ (line 128) | def __getitem__(self, idx: int):

FILE: demos/real-estate-demo.py
  function print_table (line 13) | def print_table(records, cols=None, plan_str=None):
  function within_two_miles_of_mit (line 48) | def within_two_miles_of_mit(record: dict):
  function in_price_range (line 57) | def in_price_range(record: dict):
  class RealEstateListingDataset (line 113) | class RealEstateListingDataset(pz.IterDataset):
    method __init__ (line 114) | def __init__(self, listings_dir):
    method __len__ (line 120) | def __len__(self):
    method __getitem__ (line 123) | def __getitem__(self, idx: int):

FILE: demos/simple-demo.py
  function main (line 13) | def main():

FILE: demos/vllm-demo.py
  class SentimentResult (line 21) | class SentimentResult(BaseModel):
  function main (line 25) | def main():

FILE: evals/quest/eval.py
  function prepare_docs_for_query (line 11) | def prepare_docs_for_query(items: list, gt_docs: list) -> list:
  function palimpzest_run_query (line 22) | def palimpzest_run_query(query: dict, documents: list) -> list[str]:
  function main (line 56) | def main():

FILE: scripts/capture_litellm_stats.py
  class RawProviderStatsCapture (line 48) | class RawProviderStatsCapture(CustomLogger):
    method __init__ (line 57) | def __init__(self):
    method log_success_event (line 62) | def log_success_event(self, kwargs, response_obj, start_time, end_time):
    method log_failure_event (line 94) | def log_failure_event(self, kwargs, response_obj, start_time, end_time):
    method reset (line 100) | def reset(self):
    method get_captured_data (line 106) | def get_captured_data(self) -> dict[str, Any]:
  function load_messages (line 179) | def load_messages(modality: str, provider: str, messages_dir: str) -> li...
  function transform_messages_for_litellm (line 186) | def transform_messages_for_litellm(messages: list[dict]) -> list[dict]:
  function call_litellm_api (line 267) | def call_litellm_api(
  function capture_stats_for_provider (line 382) | def capture_stats_for_provider(
  function save_stats (line 425) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo...
  function main (line 436) | def main():

FILE: scripts/capture_provider_stats.py
  function detect_image_media_type (line 35) | def detect_image_media_type(base64_data: str) -> str:
  function load_messages (line 119) | def load_messages(modality: str, provider: str, messages_dir: str) -> li...
  function transform_messages_for_openai (line 126) | def transform_messages_for_openai(messages: list[dict]) -> list[dict]:
  function transform_messages_for_anthropic (line 228) | def transform_messages_for_anthropic(messages: list[dict]) -> tuple[str ...
  function transform_messages_for_gemini (line 311) | def transform_messages_for_gemini(messages: list[dict]) -> tuple[str | N...
  function call_openai_api (line 386) | def call_openai_api(messages: list[dict], model: str, cache_key: str | N...
  function call_azure_api (line 446) | def call_azure_api(messages: list[dict], model: str, cache_key: str | No...
  function call_anthropic_api (line 516) | def call_anthropic_api(messages: list[dict], model: str) -> dict[str, Any]:
  function call_gemini_api (line 565) | def call_gemini_api(messages: list[dict], model: str, use_vertex: bool =...
  function capture_stats_for_provider (line 668) | def capture_stats_for_provider(
  function save_stats (line 726) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo...
  function main (line 737) | def main():

FILE: scripts/generate_test_messages.py
  function generate_session_id (line 36) | def generate_session_id(provider: str, modality: str) -> str:
  class TextInputSchema (line 129) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 135) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 141) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 154) | class OutputSchema(BaseModel):
  function save_messages (line 255) | def save_messages(modality: str, provider: str, messages: list[dict], ou...
  function main (line 283) | def main():

FILE: scripts/update_model_info.py
  function get_free_port (line 115) | def get_free_port() -> int:
  function extract_provider (line 121) | def extract_provider(model_id: str) -> str:
  function get_api_key_env_var (line 156) | def get_api_key_env_var(provider: str) -> str | None:
  function generate_config_yaml (line 160) | def generate_config_yaml(model_ids: list[str]) -> str:
  function fetch_dynamic_model_info (line 188) | def fetch_dynamic_model_info(model_ids: list[str]) -> dict[str, Any]:
  function fetch_litellm_data (line 262) | def fetch_litellm_data() -> dict[str, Any]:
  function load_existing_data (line 275) | def load_existing_data() -> dict[str, Any]:
  function save_data (line 282) | def save_data(data: dict[str, Any]) -> None:
  function derive_model_flags_with_provider (line 295) | def derive_model_flags_with_provider(model_id: str, provider: str) -> di...
  function prompt_for_value (line 307) | def prompt_for_value(field_name: str, current_value: Any, value_type: st...
  function review_field (line 331) | def review_field(
  function convert_and_review_model (line 368) | def convert_and_review_model(
  function update_model (line 539) | def update_model(
  function process_models (line 572) | def process_models(
  function main (line 625) | def main():

FILE: src/palimpzest/agents/search_agents.py
  class PZBaseAgent (line 71) | class PZBaseAgent(CodeAgent):
    method __init__ (line 72) | def __init__(self, run_id: str, context_description: str, *args, **kwa...
    method write_memory_to_messages (line 87) | def write_memory_to_messages(
    method _generate_planning_step (line 101) | def _generate_planning_step(
    method _step_stream (line 243) | def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessa...
    method _run_stream (line 378) | def _run_stream(
    method run (line 445) | def run(
  class PZBaseManagedAgent (line 545) | class PZBaseManagedAgent(PZBaseAgent):
    method __call__ (line 547) | def __call__(self, task: str, **kwargs):
  class DataDiscoveryAgent (line 569) | class DataDiscoveryAgent(PZBaseManagedAgent):
    method __init__ (line 570) | def __init__(self, run_id: str, context_description: str, *args, **kwa...
  class SearchManagerAgent (line 604) | class SearchManagerAgent(PZBaseAgent):
    method __init__ (line 605) | def __init__(self, run_id: str, context_description: str, *args, **kwa...

FILE: src/palimpzest/constants.py
  class PromptStrategy (line 12) | class PromptStrategy(str, Enum):
    method is_agg_prompt (line 46) | def is_agg_prompt(self):
    method is_filter_prompt (line 49) | def is_filter_prompt(self):
    method is_join_prompt (line 52) | def is_join_prompt(self):
    method is_map_prompt (line 55) | def is_map_prompt(self):
    method is_critic_prompt (line 58) | def is_critic_prompt(self):
    method is_refine_prompt (line 61) | def is_refine_prompt(self):
    method is_moa_proposer_prompt (line 64) | def is_moa_proposer_prompt(self):
    method is_moa_aggregator_prompt (line 67) | def is_moa_aggregator_prompt(self):
    method is_split_proposer_prompt (line 70) | def is_split_proposer_prompt(self):
    method is_split_merger_prompt (line 73) | def is_split_merger_prompt(self):
    method is_no_reasoning_prompt (line 76) | def is_no_reasoning_prompt(self):
  class Modality (line 80) | class Modality(str, Enum):
  class AggFunc (line 86) | class AggFunc(str, Enum):
  class Cardinality (line 93) | class Cardinality(str, Enum):
    method _missing_ (line 98) | def _missing_(cls, value):
  class PickOutputStrategy (line 108) | class PickOutputStrategy(str, Enum):
  function log_attempt_number (line 135) | def log_attempt_number(retry_state):
  class Model (line 191) | class Model:
    method __init__ (line 199) | def __init__(self, model_id: str, api_base: str | None = None, **vllm_...
    method _get_litellm_model_specs (line 215) | def _get_litellm_model_specs(self, model_id: str) -> dict:
    method __lt__ (line 257) | def __lt__(self, other):
    method get_all_models (line 265) | def get_all_models(cls) -> list[Model]:
    method value (line 269) | def value(self) -> str:
    method provider (line 273) | def provider(self) -> str | None:
    method api_key_env_var (line 278) | def api_key_env_var(self) -> str | None:
    method __repr__ (line 295) | def __repr__(self) -> str:
    method __str__ (line 298) | def __str__(self) -> str:
    method __eq__ (line 301) | def __eq__(self, other: object) -> bool:
    method __hash__ (line 308) | def __hash__(self) -> int:
    method is_llama_model (line 311) | def is_llama_model(self) -> bool:
    method is_vllm_model (line 314) | def is_vllm_model(self) -> bool:
    method is_embedding_model (line 317) | def is_embedding_model(self) -> bool:
    method is_text_image_multimodal_embedding_model (line 320) | def is_text_image_multimodal_embedding_model(self) -> bool:
    method is_provider_vertex_ai (line 323) | def is_provider_vertex_ai(self) -> bool:
    method is_provider_anthropic (line 326) | def is_provider_anthropic(self) -> bool:
    method is_provider_google_ai_studio (line 329) | def is_provider_google_ai_studio(self) -> bool:
    method is_provider_openai (line 332) | def is_provider_openai(self) -> bool:
    method is_provider_azure (line 335) | def is_provider_azure(self) -> bool:
    method is_provider_together_ai (line 338) | def is_provider_together_ai(self) -> bool:
    method is_provider_deepseek (line 341) | def is_provider_deepseek(self) -> bool:
    method is_provider_ollama (line 344) | def is_provider_ollama(self) -> bool:
    method is_model_gemini (line 347) | def is_model_gemini(self) -> bool:
    method get_model_name (line 350) | def get_model_name(self) -> str:
    method is_o_model (line 353) | def is_o_model(self) -> bool:
    method is_gpt_5_model (line 356) | def is_gpt_5_model(self) -> bool:
    method is_reasoning_model (line 359) | def is_reasoning_model(self) -> bool:
    method is_text_model (line 362) | def is_text_model(self) -> bool:
    method is_vision_model (line 365) | def is_vision_model(self) -> bool:
    method is_audio_model (line 368) | def is_audio_model(self) -> bool:
    method is_text_image_multimodal_model (line 371) | def is_text_image_multimodal_model(self) -> bool:
    method is_text_audio_multimodal_model (line 374) | def is_text_audio_multimodal_model(self) -> bool:
    method supports_prompt_caching (line 377) | def supports_prompt_caching(self) -> bool:
    method get_usd_per_input_token (line 381) | def get_usd_per_input_token(self) -> float:
    method get_usd_per_audio_input_token (line 384) | def get_usd_per_audio_input_token(self) -> float:
    method get_usd_per_image_input_token (line 388) | def get_usd_per_image_input_token(self) -> float:
    method get_usd_per_cache_read_token (line 391) | def get_usd_per_cache_read_token(self) -> float:
    method get_usd_per_audio_cache_read_token (line 394) | def get_usd_per_audio_cache_read_token(self) -> float:
    method get_usd_per_image_cache_read_token (line 397) | def get_usd_per_image_cache_read_token(self) -> float:
    method get_usd_per_cached_token_per_hour (line 401) | def get_usd_per_cached_token_per_hour(self) -> float:
    method get_usd_per_cache_creation_token (line 404) | def get_usd_per_cache_creation_token(self) -> float:
    method get_usd_per_output_token (line 407) | def get_usd_per_output_token(self) -> float:
    method get_usd_per_audio_cache_creation_token (line 411) | def get_usd_per_audio_cache_creation_token(self) -> float:
    method get_usd_per_image_cache_creation_token (line 415) | def get_usd_per_image_cache_creation_token(self) -> float:
    method get_seconds_per_output_token (line 418) | def get_seconds_per_output_token(self) -> float:
    method get_overall_score (line 421) | def get_overall_score(self) -> float:

FILE: src/palimpzest/core/data/context.py
  class Context (line 120) | class Context(Dataset, ABC):
    method __init__ (line 135) | def __init__(
    method description (line 175) | def description(self) -> str:
    method materialized (line 180) | def materialized(self) -> bool:
    method tools (line 185) | def tools(self) -> list[Callable]:
    method __str__ (line 189) | def __str__(self) -> str:
    method set_description (line 192) | def set_description(self, description: str) -> None:
    method set_materialized (line 198) | def set_materialized(self, materialized: str) -> None:
    method compute (line 204) | def compute(self, instruction: str) -> Context:
    method search (line 221) | def search(self, search_query: str) -> Context:
  class TextFileContext (line 236) | class TextFileContext(Context):
    method __init__ (line 237) | def __init__(self, path: str, id: str, description: str) -> None:
    method _check_filter_answer_text (line 271) | def _check_filter_answer_text(self, answer_text: str) -> dict | None:
    method _parse_filter_answer (line 288) | def _parse_filter_answer(self, completion_text: str) -> dict[str, list]:
    method tool_execute_semantic_operators (line 347) | def tool_execute_semantic_operators(self, instruction: str) -> str:

FILE: src/palimpzest/core/data/context_manager.py
  class ContextNotFoundError (line 14) | class ContextNotFoundError(Exception):
  class ContextManager (line 18) | class ContextManager:
    method __init__ (line 24) | def __init__(self):
    method from_pkl (line 45) | def from_pkl(path: str) -> context.Context:
    method to_pkl (line 53) | def to_pkl(context: context.Context, path: str) -> None:
    method num_tokens_from_string (line 58) | def num_tokens_from_string(self, string: str, encoding_name: str) -> int:
    method add_context (line 64) | def add_context(self, context: context.Context, update: bool = False) ...
    method update_context (line 101) | def update_context(self, id: str, description: str, materialized: bool...
    method get_context (line 119) | def get_context(self, id: str) -> context.Context:
    method search_context (line 135) | def search_context(self, query: str, k: int = 1, where: dict | None = ...

FILE: src/palimpzest/core/data/dataset.py
  class Dataset (line 36) | class Dataset:
    method __init__ (line 66) | def __init__(
    method id (line 105) | def id(self) -> str:
    method schema (line 110) | def schema(self) -> type[BaseModel]:
    method is_root (line 115) | def is_root(self) -> bool:
    method __str__ (line 118) | def __str__(self) -> str:
    method __iter__ (line 121) | def __iter__(self) -> Iterator[Dataset]:
    method _compute_dataset_id (line 126) | def _compute_dataset_id(self) -> str:
    method _set_root_datasets (line 136) | def _set_root_datasets(self, new_root_datasets: dict[str, Dataset]) ->...
    method _generate_unique_logical_op_ids (line 154) | def _generate_unique_logical_op_ids(self, topo_idx: int | None = None)...
    method _resolve_depends_on (line 178) | def _resolve_depends_on(self, depends_on: list[str]) -> list[str]:
    method _get_root_datasets (line 184) | def _get_root_datasets(self) -> dict[str, Dataset]:
    method relax_types (line 196) | def relax_types(self) -> None:
    method get_upstream_datasets (line 211) | def get_upstream_datasets(self) -> list[Dataset]:
    method get_limit (line 222) | def get_limit(self) -> int | None:
    method copy (line 238) | def copy(self):
    method join (line 246) | def join(self, other: Dataset, on: str | list[str], how: str = "inner"...
    method sem_join (line 269) | def sem_join(self, other: Dataset, condition: str, desc: str | None = ...
    method filter (line 292) | def filter(
    method sem_filter (line 317) | def sem_filter(
    method _sem_map (line 340) | def _sem_map(self, cols: list[dict] | type[BaseModel] | None,
    method sem_add_columns (line 373) | def sem_add_columns(self, cols: list[dict] | type[BaseModel],
    method sem_map (line 402) | def sem_map(self, cols: list[dict] | type[BaseModel], desc: str | None...
    method sem_flat_map (line 416) | def sem_flat_map(self, cols: list[dict] | type[BaseModel], desc: str |...
    method _map (line 432) | def _map(self, udf: Callable,
    method add_columns (line 464) | def add_columns(self, udf: Callable,
    method map (line 502) | def map(self, udf: Callable,
    method flat_map (line 526) | def flat_map(self, udf: Callable,
    method count (line 550) | def count(self) -> Dataset:
    method average (line 555) | def average(self) -> Dataset:
    method sum (line 560) | def sum(self) -> Dataset:
    method min (line 565) | def min(self) -> Dataset:
    method max (line 570) | def max(self) -> Dataset:
    method groupby (line 575) | def groupby(self, groupby: GroupBySig) -> Dataset:
    method sem_agg (line 580) | def sem_agg(self, col: dict | type[BaseModel], agg: str, depends_on: s...
    method sem_topk (line 611) | def sem_topk(
    method limit (line 650) | def limit(self, n: int) -> Dataset:
    method distinct (line 655) | def distinct(self, distinct_cols: list[str] | None = None) -> Dataset:
    method project (line 660) | def project(self, project_cols: list[str] | str) -> Dataset:
    method run (line 667) | def run(self, config: QueryProcessorConfig | None = None, **kwargs):
    method optimize_and_run (line 682) | def optimize_and_run(self, config: QueryProcessorConfig | None = None,...

FILE: src/palimpzest/core/data/index_dataset.py
  function index_factory (line 8) | def index_factory(index: Collection) -> PZIndex:
  class BaseIndex (line 24) | class BaseIndex(ABC):
    method __init__ (line 26) | def __init__(self, index: Collection):
    method __str__ (line 29) | def __str__(self):
    method search (line 36) | def search(self, query_embedding: list[float] | list[list[float]], res...
  class ChromaIndex (line 53) | class ChromaIndex(BaseIndex):
    method __init__ (line 54) | def __init__(self, index: Collection):

FILE: src/palimpzest/core/data/iter_dataset.py
  class IterDataset (line 33) | class IterDataset(dataset.Dataset, ABC):
    method __init__ (line 42) | def __init__(self, id: str, schema: type[BaseModel] | list[dict]) -> N...
    method __len__ (line 55) | def __len__(self) -> int:
    method __getitem__ (line 60) | def __getitem__(self, idx: int) -> dict:
  class BaseFileDataset (line 79) | class BaseFileDataset(IterDataset):
    method __init__ (line 85) | def __init__(self, path: str, **kwargs) -> None:
    method __len__ (line 110) | def __len__(self) -> int:
  class BaseFileDirectoryDataset (line 114) | class BaseFileDirectoryDataset(IterDataset):
    method __init__ (line 120) | def __init__(self, path: str, **kwargs) -> None:
    method __len__ (line 146) | def __len__(self) -> int:
  class MemoryDataset (line 152) | class MemoryDataset(IterDataset):
    method __init__ (line 162) | def __init__(self, id: str, vals: list | pd.DataFrame, schema: type[Ba...
    method __len__ (line 177) | def __len__(self) -> int:
    method __getitem__ (line 180) | def __getitem__(self, idx: int) -> dict:
  class HTMLFileDataset (line 216) | class HTMLFileDataset(BaseFileDataset):
    method __init__ (line 221) | def __init__(self, id: str, path: str) -> None:
    method _html_to_text_with_links (line 232) | def _html_to_text_with_links(self, html: str) -> str:
    method __getitem__ (line 248) | def __getitem__(self, idx: int) -> dict:
  class ImageFileDataset (line 284) | class ImageFileDataset(BaseFileDataset):
    method __init__ (line 289) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 300) | def __getitem__(self, idx: int) -> dict:
  class PDFFileDataset (line 326) | class PDFFileDataset(BaseFileDataset):
    method __init__ (line 333) | def __init__(
    method __getitem__ (line 354) | def __getitem__(self, idx: int) -> dict:
  class TextFileDataset (line 385) | class TextFileDataset(BaseFileDataset):
    method __init__ (line 390) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 400) | def __getitem__(self, idx: int) -> dict:
  class XLSFileDataset (line 425) | class XLSFileDataset(BaseFileDataset):
    method __init__ (line 430) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 437) | def __getitem__(self, idx: int) -> dict:
  class AudioFileDataset (line 472) | class AudioFileDataset(BaseFileDirectoryDataset):
    method __init__ (line 477) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 488) | def __getitem__(self, idx: int) -> dict:
  function get_local_source (line 514) | def get_local_source(id: str, path: str | Path, **kwargs) -> dataset.Dat...
  function resolve_datasource (line 542) | def resolve_datasource(id: str, source: str | Path | list | pd.DataFrame...

FILE: src/palimpzest/core/elements/filters.py
  class Filter (line 11) | class Filter:
    method __init__ (line 14) | def __init__(self, filter_condition: str | None = None, filter_fn: Cal...
    method serialize (line 18) | def serialize(self) -> dict[str, Any]:
    method get_filter_str (line 24) | def get_filter_str(self) -> str:
    method __repr__ (line 27) | def __repr__(self) -> str:
    method __hash__ (line 30) | def __hash__(self) -> int:
    method __eq__ (line 34) | def __eq__(self, other) -> bool:
    method __str__ (line 42) | def __str__(self) -> str:

FILE: src/palimpzest/core/elements/groupbysig.py
  class GroupBySig (line 21) | class GroupBySig:
    method __init__ (line 22) | def __init__(self, group_by_fields: list[str], agg_funcs: list[str], a...
    method validate_schema (line 27) | def validate_schema(self, input_schema: type[BaseModel]) -> tuple[bool...
    method serialize (line 36) | def serialize(self) -> dict[str, Any]:
    method __str__ (line 44) | def __str__(self) -> str:
    method __hash__ (line 47) | def __hash__(self) -> int:
    method __eq__ (line 51) | def __eq__(self, other) -> bool:
    method get_agg_field_names (line 55) | def get_agg_field_names(self) -> list[str]:
    method output_schema (line 62) | def output_schema(self) -> type[BaseModel]:

FILE: src/palimpzest/core/elements/records.py
  class DataRecord (line 28) | class DataRecord:
    method __init__ (line 31) | def __init__(
    method __setattr__ (line 89) | def __setattr__(self, name: str, value: Any, /) -> None:
    method __getattr__ (line 96) | def __getattr__(self, name: str) -> Any:
    method __getitem__ (line 100) | def __getitem__(self, field: str) -> Any:
    method __setitem__ (line 104) | def __setitem__(self, field: str, value: Any) -> None:
    method __str__ (line 108) | def __str__(self, truncate: int | None = 15) -> str:
    method __repr__ (line 116) | def __repr__(self) -> str:
    method __eq__ (line 120) | def __eq__(self, other):
    method __hash__ (line 124) | def __hash__(self):
    method __iter__ (line 128) | def __iter__(self):
    method get_field_names (line 132) | def get_field_names(self):
    method get_field_type (line 136) | def get_field_type(self, field_name: str) -> FieldInfo:
    method schema (line 140) | def schema(self) -> type[BaseModel]:
    method copy (line 143) | def copy(self) -> DataRecord:
    method from_parent (line 164) | def from_parent(
    method from_agg_parents (line 208) | def from_agg_parents(
    method from_join_parents (line 229) | def from_join_parents(
    method to_df (line 278) | def to_df(records: list[DataRecord], project_cols: list[str] | None = ...
    method to_json_str (line 297) | def to_json_str(self, include_bytes: bool = True, bytes_to_str: bool =...
    method to_dict (line 302) | def to_dict(self, include_bytes: bool = True, bytes_to_str: bool = Fal...
  class DataRecordSet (line 343) | class DataRecordSet:
    method __init__ (line 351) | def __init__(
    method get_total_cost (line 378) | def get_total_cost(self) -> float:
    method get_field_to_score_fn (line 381) | def get_field_to_score_fn(self) -> dict[str, str | callable]:
    method __getitem__ (line 384) | def __getitem__(self, slice) -> DataRecord | list[DataRecord]:
    method __len__ (line 387) | def __len__(self) -> int:
    method __iter__ (line 390) | def __iter__(self) -> Generator[DataRecord]:
  class DataRecordCollection (line 394) | class DataRecordCollection:
    method __init__ (line 410) | def __init__(self, data_records: list[DataRecord], execution_stats: Ex...
    method __iter__ (line 416) | def __iter__(self) -> Generator[DataRecord]:
    method __len__ (line 420) | def __len__(self):
    method to_df (line 424) | def to_df(self, cols: list[str] | None = None):
    method _get_executed_plans (line 427) | def _get_executed_plans(self):

FILE: src/palimpzest/core/lib/schemas.py
  function get_schema_field_names (line 60) | def get_schema_field_names(schema: type[BaseModel], id: str | None = Non...
  function _create_pickleable_model (line 65) | def _create_pickleable_model(fields: dict[str, tuple[type, FieldInfo]]) ...
  function relax_schema (line 90) | def relax_schema(model: type[BaseModel]) -> type[BaseModel]:
  function project (line 99) | def project(model: type[BaseModel], project_fields: list[str]) -> type[B...
  function create_schema_from_fields (line 114) | def create_schema_from_fields(fields: list[dict]) -> type[BaseModel]:
  function create_schema_from_df (line 132) | def create_schema_from_df(df: pd.DataFrame) -> type[BaseModel]:
  function union_schemas (line 145) | def union_schemas(models: list[type[BaseModel]], join: bool = False, on:...
  class DefaultSchema (line 181) | class DefaultSchema(BaseModel):
  class Download (line 185) | class Download(BaseModel):
  class File (line 191) | class File(BaseModel):
  class TextFile (line 200) | class TextFile(BaseModel):
  class Average (line 205) | class Average(BaseModel):
  class Count (line 208) | class Count(BaseModel):
  class Sum (line 211) | class Sum(BaseModel):
  class Min (line 214) | class Min(BaseModel):
  class Max (line 217) | class Max(BaseModel):
  class OperatorDerivedSchema (line 220) | class OperatorDerivedSchema(BaseModel):
  class Table (line 223) | class Table(BaseModel):
  class URL (line 230) | class URL(BaseModel):
  class WebPage (line 234) | class WebPage(BaseModel):
  class ImageFile (line 242) | class ImageFile(File):
  class AudioFile (line 246) | class AudioFile(File):
  class PDFFile (line 250) | class PDFFile(File):
  class XLSFile (line 255) | class XLSFile(File):
  class EquationImage (line 261) | class EquationImage(ImageFile):
  class PlotImage (line 265) | class PlotImage(ImageFile):

FILE: src/palimpzest/core/models.py
  class GenerationStats (line 11) | class GenerationStats(BaseModel):
    method __iadd__ (line 59) | def __iadd__(self, other: GenerationStats) -> GenerationStats:
    method __add__ (line 66) | def __add__(self, other: GenerationStats) -> GenerationStats:
    method __itruediv__ (line 76) | def __itruediv__(self, quotient: float) -> GenerationStats:
    method __truediv__ (line 87) | def __truediv__(self, quotient: float) -> GenerationStats:
    method __radd__ (line 100) | def __radd__(self, other: int) -> GenerationStats:
    method to_json (line 106) | def to_json(self, filepath: str | None = None) -> dict | None:
  class RecordOpStats (line 114) | class RecordOpStats(BaseModel):
  class OperatorStats (line 227) | class OperatorStats(BaseModel):
    method __iadd__ (line 280) | def __iadd__(self, stats: OperatorStats | RecordOpStats) -> OperatorSt...
  class BasePlanStats (line 323) | class BasePlanStats(BaseModel):
    method start (line 385) | def start(self) -> None:
    method finish (line 389) | def finish(self) -> None:
    method from_plan (line 405) | def from_plan(plan) -> BasePlanStats:
    method sum_op_stats_field (line 412) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method sum_validation_stats_field (line 416) | def sum_validation_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 421) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:...
    method __iadd__ (line 428) | def __iadd__(self, plan_stats: BasePlanStats) -> None:
    method __str__ (line 435) | def __str__(self) -> str:
    method get_total_cost_so_far (line 441) | def get_total_cost_so_far(self) -> float:
  class PlanStats (line 448) | class PlanStats(BasePlanStats):
    method from_plan (line 453) | def from_plan(plan) -> PlanStats:
    method sum_op_stats_field (line 471) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 475) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:...
    method __iadd__ (line 489) | def __iadd__(self, plan_stats: PlanStats) -> None:
    method __str__ (line 512) | def __str__(self) -> str:
  class SentinelPlanStats (line 527) | class SentinelPlanStats(BasePlanStats):
    method from_plan (line 532) | def from_plan(plan) -> SentinelPlanStats:
    method sum_op_stats_field (line 552) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 556) | def add_record_op_stats(self, unique_logical_op_id: str, record_op_sta...
    method add_validation_gen_stats (line 574) | def add_validation_gen_stats(self, unique_logical_op_id: str, gen_stat...
    method __iadd__ (line 583) | def __iadd__(self, plan_stats: SentinelPlanStats) -> None:
    method __str__ (line 616) | def __str__(self) -> str:
  class ExecutionStats (line 635) | class ExecutionStats(BaseModel):
    method start (line 700) | def start(self) -> None:
    method finish_optimization (line 704) | def finish_optimization(self) -> None:
    method finish (line 717) | def finish(self) -> None:
    method sum_plan_stats_field (line 747) | def sum_plan_stats_field(self, field_name: str) -> float | int:
    method sum_sentinel_plan_costs (line 755) | def sum_sentinel_plan_costs(self) -> float:
    method sum_plan_costs (line 764) | def sum_plan_costs(self) -> float:
    method add_plan_stats (line 770) | def add_plan_stats(self, plan_stats: PlanStats | SentinelPlanStats | l...
    method to_json (line 794) | def to_json(self, filepath: str | None = None) -> dict | None:
  class OperatorCostEstimates (line 802) | class OperatorCostEstimates(BaseModel):
    method __rmul__ (line 843) | def __rmul__(self, multiplier: float) -> OperatorCostEstimates:
    method model_post_init (line 850) | def model_post_init(self, __context: Any) -> None:
  class PlanCost (line 868) | class PlanCost(BaseModel):
    method __hash__ (line 903) | def __hash__(self):
    method __eq__ (line 906) | def __eq__(self, other: Any) -> bool:
    method model_post_init (line 915) | def model_post_init(self, __context: Any) -> None:
    method join_add (line 928) | def join_add(self, left_plan_cost: PlanCost, right_plan_cost: PlanCost...
    method __iadd__ (line 969) | def __iadd__(self, other: PlanCost) -> PlanCost:
    method __add__ (line 989) | def __add__(self, other: PlanCost) -> PlanCost:

FILE: src/palimpzest/policy.py
  function construct_policy_from_kwargs (line 8) | def construct_policy_from_kwargs(**kwargs) -> Policy | None:
  class Policy (line 64) | class Policy:
    method __init__ (line 73) | def __init__(self):
    method get_primary_metric (line 76) | def get_primary_metric(self) -> str:
    method get_dict (line 87) | def get_dict(self) -> dict:
    method constraint (line 94) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 101) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
    method to_json_str (line 107) | def to_json_str(self) -> str:
  class MaxQuality (line 115) | class MaxQuality(Policy):
    method __str__ (line 121) | def __str__(self):
    method get_primary_metric (line 124) | def get_primary_metric(self) -> str:
    method get_dict (line 127) | def get_dict(self) -> dict:
    method constraint (line 130) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 134) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinCost (line 147) | class MinCost(Policy):
    method __str__ (line 153) | def __str__(self):
    method get_primary_metric (line 156) | def get_primary_metric(self) -> str:
    method get_dict (line 159) | def get_dict(self) -> dict:
    method constraint (line 162) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 166) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinTime (line 179) | class MinTime(Policy):
    method __str__ (line 185) | def __str__(self):
    method get_primary_metric (line 188) | def get_primary_metric(self) -> str:
    method get_dict (line 191) | def get_dict(self) -> dict:
    method constraint (line 194) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 198) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MaxQualityAtFixedCost (line 211) | class MaxQualityAtFixedCost(Policy):
    method __init__ (line 217) | def __init__(self, max_cost: float):
    method __str__ (line 220) | def __str__(self):
    method get_primary_metric (line 223) | def get_primary_metric(self) -> str:
    method get_dict (line 226) | def get_dict(self) -> dict:
    method constraint (line 229) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 232) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MaxQualityAtFixedTime (line 245) | class MaxQualityAtFixedTime(Policy):
    method __init__ (line 251) | def __init__(self, max_time: float):
    method __str__ (line 254) | def __str__(self):
    method get_primary_metric (line 257) | def get_primary_metric(self) -> str:
    method get_dict (line 260) | def get_dict(self) -> dict:
    method constraint (line 263) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 266) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinCostAtFixedQuality (line 279) | class MinCostAtFixedQuality(Policy):
    method __init__ (line 285) | def __init__(self, min_quality: float):
    method __str__ (line 288) | def __str__(self):
    method get_primary_metric (line 291) | def get_primary_metric(self) -> str:
    method get_dict (line 294) | def get_dict(self) -> dict:
    method constraint (line 297) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 300) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinTimeAtFixedQuality (line 313) | class MinTimeAtFixedQuality(Policy):
    method __init__ (line 319) | def __init__(self, min_quality: float):
    method __str__ (line 322) | def __str__(self):
    method get_primary_metric (line 325) | def get_primary_metric(self) -> str:
    method get_dict (line 328) | def get_dict(self) -> dict:
    method constraint (line 331) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 334) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:

FILE: src/palimpzest/prompts/prompt_factory.py
  function _detect_image_media_type (line 144) | def _detect_image_media_type(filepath: str | None = None, base64_data: s...
  class PromptFactory (line 170) | class PromptFactory:
    method __init__ (line 232) | def __init__(self, prompt_strategy: PromptStrategy, model: Model, card...
    method _get_context (line 238) | def _get_context(self, candidate: DataRecord | list[DataRecord], input...
    method _get_input_fields (line 289) | def _get_input_fields(self, candidate: DataRecord, **kwargs) -> list[s...
    method _get_input_modalities (line 308) | def _get_input_modalities(self, candidate: DataRecord, input_fields: l...
    method _get_modalities_str (line 331) | def _get_modalities_str(self, input_modalities: set[Modality]) -> str:
    method _get_input_fields_desc (line 356) | def _get_input_fields_desc(self, candidate: DataRecord, input_fields: ...
    method _get_output_fields_desc (line 372) | def _get_output_fields_desc(self, output_fields: list[str], **kwargs) ...
    method _get_agg_instruction (line 395) | def _get_agg_instruction(self, **kwargs) -> str | None:
    method _get_filter_condition (line 408) | def _get_filter_condition(self, **kwargs) -> str | None:
    method _get_join_condition (line 421) | def _get_join_condition(self, **kwargs) -> str | None:
    method _get_original_output (line 434) | def _get_original_output(self, **kwargs) -> str | None:
    method _get_critique_output (line 452) | def _get_critique_output(self, **kwargs) -> str | None:
    method _get_model_responses (line 468) | def _get_model_responses(self, **kwargs) -> str | None:
    method _get_chunk_outputs (line 487) | def _get_chunk_outputs(self, **kwargs) -> str | None:
    method _get_output_format_instruction (line 506) | def _get_output_format_instruction(self) -> str:
    method _get_job_instruction (line 519) | def _get_job_instruction(self, input_modalities: set[Modality]) -> str...
    method _get_desc_section (line 549) | def _get_desc_section(self) -> str:
    method _get_critique_criteria (line 562) | def _get_critique_criteria(self) -> str | None:
    method _get_refinement_criteria (line 575) | def _get_refinement_criteria(self) -> str | None:
    method _get_finish_instruction (line 588) | def _get_finish_instruction(self) -> str | None:
    method _get_example_input_fields (line 603) | def _get_example_input_fields(self, input_modalities: set[Modality], r...
    method _get_example_output_fields (line 627) | def _get_example_output_fields(self, input_modalities: set[Modality]) ...
    method _get_example_context (line 650) | def _get_example_context(self, input_modalities: set[Modality], right:...
    method _get_image_disclaimer (line 684) | def _get_image_disclaimer(self, input_modalities: set[Modality], right...
    method _get_audio_disclaimer (line 697) | def _get_audio_disclaimer(self, input_modalities: set[Modality], right...
    method _get_example_reasoning (line 710) | def _get_example_reasoning(self, input_modalities: set[Modality]) -> str:
    method _get_example_answer (line 737) | def _get_example_answer(self, input_modalities: set[Modality]) -> str:
    method _get_all_format_kwargs (line 763) | def _get_all_format_kwargs(
    method _create_audio_messages (line 837) | def _create_audio_messages(self, candidate: DataRecord | list[DataReco...
    method _create_image_messages (line 893) | def _create_image_messages(self, candidate: DataRecord | list[DataReco...
    method _get_system_prompt (line 963) | def _get_system_prompt(self, **format_kwargs) -> str | None:
    method _get_user_messages (line 980) | def _get_user_messages(self, candidate: DataRecord | list[DataRecord],...
    method create_messages (line 1074) | def create_messages(self, candidate: DataRecord | list[DataRecord], ou...

FILE: src/palimpzest/prompts/prompt_manager.py
  class PromptManager (line 17) | class PromptManager:
    method __init__ (line 30) | def __init__(self, model: Model):
    method get_cache_kwargs (line 35) | def get_cache_kwargs(self) -> dict[str, Any]:
    method inject_cache_isolation_id (line 51) | def inject_cache_isolation_id(self, messages: list[dict], session_id: ...
    method update_messages_for_caching (line 65) | def update_messages_for_caching(self, messages: list[dict]) -> list[di...
    method extract_usage_stats (line 92) | def extract_usage_stats(self, usage: dict, is_audio_op: bool) -> dict[...
    method _remove_cache_boundary_markers (line 149) | def _remove_cache_boundary_markers(self, messages: list[dict]) -> list...
    method _transform_messages_for_anthropic (line 173) | def _transform_messages_for_anthropic(self, messages: list[dict]) -> l...

FILE: src/palimpzest/query/execution/all_sample_execution_strategy.py
  class OpSet (line 20) | class OpSet:
    method __init__ (line 29) | def __init__(self, op_set: list[PhysicalOperator], source_unique_logic...
    method get_op_inputs (line 48) | def get_op_inputs(self) -> list[PhysicalOperator, DataRecord | int | N...
    method pick_highest_quality_output (line 103) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]...
    method update_inputs (line 141) | def update_inputs(self, source_idx_to_record_sets: dict[int, DataRecor...
  class AllSamplingExecutionStrategy (line 153) | class AllSamplingExecutionStrategy(SentinelExecutionStrategy):
    method _execute_sentinel_plan (line 155) | def _execute_sentinel_plan(self,
    method execute_sentinel_plan (line 208) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic...

FILE: src/palimpzest/query/execution/execution_strategy.py
  class BaseExecutionStrategy (line 25) | class BaseExecutionStrategy:
    method __init__ (line 26) | def __init__(self,
  class ExecutionStrategy (line 43) | class ExecutionStrategy(BaseExecutionStrategy, ABC):
    method __init__ (line 46) | def __init__(self, *args, **kwargs):
    method execute_plan (line 52) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ...
    method _create_input_queues (line 56) | def _create_input_queues(self, plan: PhysicalPlan) -> dict[str, dict[s...
  class SentinelExecutionStrategy (line 77) | class SentinelExecutionStrategy(BaseExecutionStrategy, ABC):
    method __init__ (line 83) | def __init__(
    method _score_quality (line 117) | def _score_quality(
    method _execute_op_set (line 275) | def _execute_op_set(self, unique_logical_op_id: str, op_inputs: list[t...
    method _is_llm_op (line 345) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool:
    method execute_sentinel_plan (line 353) | def execute_sentinel_plan(self, sentinel_plan: SentinelPlan, train_dat...

FILE: src/palimpzest/query/execution/execution_strategy_type.py
  class ExecutionStrategyType (line 12) | class ExecutionStrategyType(Enum):
    method is_fully_parallel (line 18) | def is_fully_parallel(self) -> bool:
  class SentinelExecutionStrategyType (line 22) | class SentinelExecutionStrategyType(Enum):

FILE: src/palimpzest/query/execution/mab_execution_strategy.py
  class OpFrontier (line 27) | class OpFrontier:
    method __init__ (line 36) | def __init__(
    method get_frontier_ops (line 96) | def get_frontier_ops(self) -> list[PhysicalOperator]:
    method get_off_frontier_ops (line 102) | def get_off_frontier_ops(self) -> list[PhysicalOperator]:
    method _compute_op_id_to_pareto_distance (line 108) | def _compute_op_id_to_pareto_distance(self, priors: dict[str, dict[str...
    method _compute_naive_priors (line 168) | def _compute_naive_priors(self, op_set: list[PhysicalOperator]) -> dic...
    method _get_op_index_order (line 191) | def _get_op_index_order(self, op_set: list[PhysicalOperator], seed: in...
    method _get_op_source_indices_pairs (line 258) | def _get_op_source_indices_pairs(self) -> list[tuple[PhysicalOperator,...
    method get_source_indices_for_next_iteration (line 282) | def get_source_indices_for_next_iteration(self) -> set[tuple[str]]:
    method get_frontier_op_inputs (line 289) | def get_frontier_op_inputs(self, source_indices_to_sample: set[tuple[s...
    method update_frontier (line 369) | def update_frontier(self, unique_logical_op_id: str, plan_stats: Senti...
    method pick_highest_quality_output (line 581) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]...
    method update_inputs (line 619) | def update_inputs(self, source_unique_logical_op_id: str, source_indic...
  class MABExecutionStrategy (line 631) | class MABExecutionStrategy(SentinelExecutionStrategy):
    method _remove_filtered_records_from_downstream_ops (line 639) | def _remove_filtered_records_from_downstream_ops(self, topo_idx: int, ...
    method _get_max_quality_op (line 661) | def _get_max_quality_op(self, unique_logical_op_id: str, op_frontiers:...
    method _compute_termination_condition (line 692) | def _compute_termination_condition(self, samples_drawn: int, sampling_...
    method _execute_sentinel_plan (line 695) | def _execute_sentinel_plan(
    method execute_sentinel_plan (line 791) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic...

FILE: src/palimpzest/query/execution/parallel_execution_strategy.py
  class ParallelExecutionStrategy (line 19) | class ParallelExecutionStrategy(ExecutionStrategy):
    method __init__ (line 24) | def __init__(self, *args, **kwargs):
    method _any_queue_not_empty (line 27) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic...
    method _upstream_ops_finished (line 37) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id...
    method _finish_outer_join (line 44) | def _finish_outer_join(self, executor: ThreadPoolExecutor, plan: Physi...
    method _process_future_results (line 58) | def _process_future_results(self, unique_full_op_id: str, future_queue...
    method _execute_plan (line 101) | def _execute_plan(
    method execute_plan (line 235) | def execute_plan(self, plan: PhysicalPlan):

FILE: src/palimpzest/query/execution/single_threaded_execution_strategy.py
  class SequentialSingleThreadExecutionStrategy (line 15) | class SequentialSingleThreadExecutionStrategy(ExecutionStrategy):
    method __init__ (line 25) | def __init__(self, *args, **kwargs):
    method _execute_plan (line 29) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di...
    method execute_plan (line 116) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ...
  class PipelinedSingleThreadExecutionStrategy (line 149) | class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
    method __init__ (line 163) | def __init__(self, *args, **kwargs):
    method _any_queue_not_empty (line 167) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic...
    method _upstream_ops_finished (line 177) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id...
    method _execute_plan (line 184) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di...
    method execute_plan (line 284) | def execute_plan(self, plan: PhysicalPlan):

FILE: src/palimpzest/query/generators/gemini_client.py
  class GeminiResponse (line 24) | class GeminiResponse:
  class GeminiClient (line 31) | class GeminiClient:
    method get_instance (line 57) | def get_instance(cls, model: str, use_vertex: bool = False) -> GeminiC...
    method __init__ (line 64) | def __init__(self, model: str, use_vertex: bool = False):
    method _detect_image_media_type (line 70) | def _detect_image_media_type(self, base64_data: str) -> str:
    method _transform_messages (line 86) | def _transform_messages(self, messages: list[dict]) -> tuple[str | Non...
    method _extract_usage_stats (line 182) | def _extract_usage_stats(self, usage_metadata: Any) -> dict:
    method generate (line 243) | def generate(

FILE: src/palimpzest/query/generators/generators.py
  function get_json_from_answer (line 34) | def get_json_from_answer(answer: str, model: Model, cardinality: Cardina...
  class Generator (line 98) | class Generator(Generic[ContextType, InputType]):
    method __init__ (line 103) | def __init__(
    method _parse_reasoning (line 131) | def _parse_reasoning(self, completion_text: str, **kwargs) -> str:
    method _prepare_field_answers (line 149) | def _prepare_field_answers(self, field_answers: dict | list[dict], fie...
    method _check_convert_answer_text (line 171) | def _check_convert_answer_text(self, answer_text: str, fields: dict[st...
    method _check_bool_answer_text (line 188) | def _check_bool_answer_text(self, answer_text: str, throw_exception: b...
    method _parse_convert_answer (line 206) | def _parse_convert_answer(self, completion_text: str, fields: dict[str...
    method _parse_bool_answer (line 245) | def _parse_bool_answer(self, completion_text: str, json_output: bool) ...
    method _parse_answer (line 284) | def _parse_answer(self, completion_text: str, fields: dict[str, FieldI...
    method __call__ (line 303) | def __call__(self, candidate: DataRecord | list[DataRecord], fields: d...

FILE: src/palimpzest/query/operators/aggregate.py
  class AggregateOp (line 23) | class AggregateOp(PhysicalOperator):
    method __call__ (line 29) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class ApplyGroupByOp (line 33) | class ApplyGroupByOp(AggregateOp):
    method __init__ (line 39) | def __init__(self, group_by_sig: GroupBySig, *args, **kwargs):
    method __str__ (line 43) | def __str__(self):
    method get_id_params (line 48) | def get_id_params(self):
    method get_op_params (line 52) | def get_op_params(self):
    method naive_cost_estimates (line 56) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method agg_init (line 66) | def agg_init(func):
    method agg_merge (line 85) | def agg_merge(func, state, val):
    method agg_final (line 115) | def agg_final(func, state):
    method __call__ (line 124) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class AverageAggregateOp (line 192) | class AverageAggregateOp(AggregateOp):
    method __init__ (line 195) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 215) | def __str__(self):
    method get_id_params (line 220) | def get_id_params(self):
    method get_op_params (line 224) | def get_op_params(self):
    method naive_cost_estimates (line 228) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 237) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class SumAggregateOp (line 271) | class SumAggregateOp(AggregateOp):
    method __init__ (line 274) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 294) | def __str__(self):
    method get_id_params (line 299) | def get_id_params(self):
    method get_op_params (line 303) | def get_op_params(self):
    method naive_cost_estimates (line 307) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 316) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class CountAggregateOp (line 347) | class CountAggregateOp(AggregateOp):
    method __init__ (line 350) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 358) | def __str__(self):
    method get_id_params (line 363) | def get_id_params(self):
    method get_op_params (line 367) | def get_op_params(self):
    method naive_cost_estimates (line 371) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 380) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class MinAggregateOp (line 404) | class MinAggregateOp(AggregateOp):
    method __init__ (line 407) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 415) | def __str__(self):
    method get_id_params (line 420) | def get_id_params(self):
    method get_op_params (line 424) | def get_op_params(self):
    method naive_cost_estimates (line 428) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 437) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class MaxAggregateOp (line 467) | class MaxAggregateOp(AggregateOp):
    method __init__ (line 470) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 478) | def __str__(self):
    method get_id_params (line 483) | def get_id_params(self):
    method get_op_params (line 487) | def get_op_params(self):
    method naive_cost_estimates (line 491) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 500) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class SemanticAggregate (line 531) | class SemanticAggregate(AggregateOp):
    method __init__ (line 533) | def __init__(self, agg_str: str, model: Model, prompt_strategy: Prompt...
    method __str__ (line 543) | def __str__(self):
    method get_id_params (line 550) | def get_id_params(self):
    method get_op_params (line 562) | def get_op_params(self):
    method get_model_name (line 574) | def get_model_name(self) -> str:
    method naive_cost_estimates (line 577) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 611) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:

FILE: src/palimpzest/query/operators/compute.py
  function make_tool (line 17) | def make_tool(bound_method):
  class SmolAgentsCompute (line 38) | class SmolAgentsCompute(PhysicalOperator):
    method __init__ (line 41) | def __init__(self, context_id: str, instruction: str, additional_conte...
    method __str__ (line 52) | def __str__(self):
    method get_id_params (line 59) | def get_id_params(self):
    method get_op_params (line 68) | def get_op_params(self):
    method naive_cost_estimates (line 77) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method _create_record_set (line 85) | def _create_record_set(
    method __call__ (line 129) | def __call__(self, candidate: DataRecord) -> Any:

FILE: src/palimpzest/query/operators/convert.py
  class ConvertOp (line 23) | class ConvertOp(PhysicalOperator, ABC):
    method __init__ (line 24) | def __init__(
    method get_id_params (line 37) | def get_id_params(self):
    method get_op_params (line 48) | def get_op_params(self):
    method _create_data_records_from_field_answers (line 59) | def _create_data_records_from_field_answers(
    method _create_record_set (line 90) | def _create_record_set(
    method convert (line 142) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
    method __call__ (line 163) | def __call__(self, candidate: DataRecord) -> DataRecordSet:
  class NonLLMConvert (line 198) | class NonLLMConvert(ConvertOp):
    method __str__ (line 199) | def __str__(self):
    method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 224) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class LLMConvert (line 262) | class LLMConvert(ConvertOp):
    method __init__ (line 267) | def __init__(
    method __str__ (line 282) | def __str__(self):
    method get_id_params (line 288) | def get_id_params(self):
    method get_op_params (line 299) | def get_op_params(self):
    method get_model_name (line 310) | def get_model_name(self):
    method naive_cost_estimates (line 313) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
  class LLMConvertBonded (line 352) | class LLMConvertBonded(LLMConvert):
    method convert (line 354) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...

FILE: src/palimpzest/query/operators/critique_and_refine.py
  class CritiqueAndRefineConvert (line 18) | class CritiqueAndRefineConvert(LLMConvert):
    method __init__ (line 20) | def __init__(
    method __str__ (line 35) | def __str__(self):
    method get_id_params (line 41) | def get_id_params(self):
    method get_op_params (line 51) | def get_op_params(self):
    method naive_cost_estimates (line 61) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 81) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class CritiqueAndRefineFilter (line 106) | class CritiqueAndRefineFilter(LLMFilter):
    method __init__ (line 108) | def __init__(
    method __str__ (line 123) | def __str__(self):
    method get_id_params (line 129) | def get_id_params(self):
    method get_op_params (line 139) | def get_op_params(self):
    method naive_cost_estimates (line 149) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 169) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/distinct.py
  class DistinctOp (line 8) | class DistinctOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, distinct_cols: list[str], distinct_seen: set | None...
    method __str__ (line 14) | def __str__(self):
    method get_id_params (line 19) | def get_id_params(self):
    method get_op_params (line 23) | def get_op_params(self):
    method naive_cost_estimates (line 27) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 36) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/filter.py
  class FilterOp (line 23) | class FilterOp(PhysicalOperator, ABC):
    method __init__ (line 24) | def __init__(self, filter: Filter, desc: str | None = None, *args, **k...
    method __str__ (line 30) | def __str__(self):
    method get_id_params (line 35) | def get_id_params(self):
    method get_op_params (line 39) | def get_op_params(self):
    method filter (line 44) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...
    method _create_record_set (line 60) | def _create_record_set(
    method __call__ (line 107) | def __call__(self, candidate: DataRecord) -> DataRecordSet:
  class NonLLMFilter (line 125) | class NonLLMFilter(FilterOp):
    method naive_cost_estimates (line 127) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 143) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...
  class LLMFilter (line 165) | class LLMFilter(FilterOp):
    method __init__ (line 166) | def __init__(
    method get_id_params (line 181) | def get_id_params(self):
    method get_op_params (line 192) | def get_op_params(self):
    method get_model_name (line 203) | def get_model_name(self):
    method naive_cost_estimates (line 206) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 247) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/join.py
  class Singleton (line 29) | class Singleton:
    method __new__ (line 30) | def __new__(cls, *args, **kw):
  class Locks (line 36) | class Locks(Singleton):
    method get_model (line 42) | def get_model(cls, model_name: str):
  function compute_similarity (line 48) | def compute_similarity(left_embedding: list[float], right_embedding: lis...
  class JoinOp (line 55) | class JoinOp(PhysicalOperator, ABC):
    method __init__ (line 56) | def __init__(
    method __str__ (line 86) | def __str__(self):
    method get_id_params (line 93) | def get_id_params(self):
    method get_op_params (line 105) | def get_op_params(self):
    method _compute_unmatched_records (line 118) | def _compute_unmatched_records(self) -> DataRecordSet:
    method naive_cost_estimates (line 172) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method set_finished (line 175) | def set_finished(self):
  class RelationalJoin (line 179) | class RelationalJoin(JoinOp):
    method get_model_name (line 181) | def get_model_name(self):
    method _process_join_candidate_pair (line 184) | def _process_join_candidate_pair(self, left_candidate, right_candidate...
    method naive_cost_estimates (line 228) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method __call__ (line 243) | def __call__(self, left_candidates: list[DataRecord], right_candidates...
  class LLMJoin (line 290) | class LLMJoin(JoinOp):
    method __init__ (line 291) | def __init__(
    method __str__ (line 305) | def __str__(self):
    method get_id_params (line 312) | def get_id_params(self):
    method get_op_params (line 322) | def get_op_params(self):
    method get_model_name (line 332) | def get_model_name(self):
    method _process_join_candidate_pair (line 335) | def _process_join_candidate_pair(
  class NestedLoopsJoin (line 395) | class NestedLoopsJoin(LLMJoin):
    method naive_cost_estimates (line 397) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method __call__ (line 439) | def __call__(self, left_candidates: list[DataRecord], right_candidates...
  class EmbeddingJoin (line 492) | class EmbeddingJoin(LLMJoin):
    method __init__ (line 495) | def __init__(
    method __str__ (line 529) | def __str__(self):
    method get_id_params (line 535) | def get_id_params(self):
    method get_op_params (line 545) | def get_op_params(self):
    method naive_cost_estimates (line 555) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method _compute_embeddings (line 588) | def _compute_embeddings(self, candidates: list[DataRecord], input_fiel...
    method _process_join_candidate_pair (line 636) | def _process_join_candidate_pair(self, left_candidate, right_candidate...
    method _process_join_candidate_with_sim (line 640) | def _process_join_candidate_with_sim(self, left_candidate: DataRecord,...
    method __call__ (line 675) | def __call__(self, left_candidates: list[DataRecord], right_candidates...

FILE: src/palimpzest/query/operators/limit.py
  class LimitScanOp (line 8) | class LimitScanOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, limit: int, *args, **kwargs):
    method __str__ (line 13) | def __str__(self):
    method get_id_params (line 18) | def get_id_params(self):
    method get_op_params (line 22) | def get_op_params(self):
    method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/logical.py
  class LogicalOperator (line 16) | class LogicalOperator:
    method __init__ (line 38) | def __init__(
    method __str__ (line 57) | def __str__(self) -> str:
    method __eq__ (line 60) | def __eq__(self, other) -> bool:
    method copy (line 64) | def copy(self) -> LogicalOperator:
    method logical_op_name (line 70) | def logical_op_name(self) -> str:
    method get_unique_logical_op_id (line 74) | def get_unique_logical_op_id(self) -> str:
    method set_unique_logical_op_id (line 80) | def set_unique_logical_op_id(self, unique_logical_op_id: str) -> None:
    method get_logical_id_params (line 87) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 99) | def get_logical_op_params(self) -> dict:
    method get_logical_op_id (line 112) | def get_logical_op_id(self):
    method get_generated_fields (line 134) | def get_generated_fields(self) -> list[str]:
    method __hash__ (line 138) | def __hash__(self):
  class Aggregate (line 144) | class Aggregate(LogicalOperator):
    method __init__ (line 150) | def __init__(
    method __str__ (line 176) | def __str__(self):
    method get_logical_id_params (line 180) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 190) | def get_logical_op_params(self) -> dict:
  class BaseScan (line 201) | class BaseScan(LogicalOperator):
    method __init__ (line 204) | def __init__(self, datasource: dataset.Dataset, output_schema: type[Ba...
    method __str__ (line 208) | def __str__(self):
    method __eq__ (line 211) | def __eq__(self, other) -> bool:
    method get_logical_id_params (line 219) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 228) | def get_logical_op_params(self) -> dict:
  class ContextScan (line 235) | class ContextScan(LogicalOperator):
    method __init__ (line 238) | def __init__(self, context: context.Context, output_schema: type[BaseM...
    method __str__ (line 242) | def __str__(self):
    method __eq__ (line 245) | def __eq__(self, other) -> bool:
    method get_logical_id_params (line 251) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 260) | def get_logical_op_params(self) -> dict:
  class ConvertScan (line 267) | class ConvertScan(LogicalOperator):
    method __init__ (line 270) | def __init__(
    method __str__ (line 283) | def __str__(self):
    method get_logical_id_params (line 286) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 297) | def get_logical_op_params(self) -> dict:
  class Distinct (line 309) | class Distinct(LogicalOperator):
    method __init__ (line 310) | def __init__(self, distinct_cols: list[str] | None, *args, **kwargs):
    method __str__ (line 324) | def __str__(self):
    method get_logical_id_params (line 327) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 333) | def get_logical_op_params(self) -> dict:
  class FilteredScan (line 343) | class FilteredScan(LogicalOperator):
    method __init__ (line 346) | def __init__(
    method __str__ (line 357) | def __str__(self):
    method get_logical_id_params (line 360) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 370) | def get_logical_op_params(self) -> dict:
  class GroupByAggregate (line 381) | class GroupByAggregate(LogicalOperator):
    method __init__ (line 382) | def __init__(
    method __str__ (line 396) | def __str__(self):
    method get_logical_id_params (line 399) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 405) | def get_logical_op_params(self) -> dict:
  class JoinOp (line 415) | class JoinOp(LogicalOperator):
    method __init__ (line 416) | def __init__(self, condition: str, on: list[str] | None = None, how: s...
    method __str__ (line 423) | def __str__(self):
    method get_logical_id_params (line 426) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 438) | def get_logical_op_params(self) -> dict:
  class LimitScan (line 451) | class LimitScan(LogicalOperator):
    method __init__ (line 452) | def __init__(self, limit: int, *args, **kwargs):
    method __str__ (line 456) | def __str__(self):
    method get_logical_id_params (line 459) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 465) | def get_logical_op_params(self) -> dict:
  class Project (line 475) | class Project(LogicalOperator):
    method __init__ (line 476) | def __init__(self, project_cols: list[str], *args, **kwargs):
    method __str__ (line 480) | def __str__(self):
    method get_logical_id_params (line 483) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 489) | def get_logical_op_params(self) -> dict:
  class TopKScan (line 499) | class TopKScan(LogicalOperator):
    method __init__ (line 502) | def __init__(
    method __str__ (line 519) | def __str__(self):
    method get_logical_id_params (line 522) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 536) | def get_logical_op_params(self) -> dict:
  class ComputeOperator (line 550) | class ComputeOperator(LogicalOperator):
    method __init__ (line 556) | def __init__(self, context_id: str, instruction: str, *args, **kwargs):
    method __str__ (line 561) | def __str__(self):
    method get_logical_id_params (line 564) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 574) | def get_logical_op_params(self) -> dict:
  class SearchOperator (line 585) | class SearchOperator(LogicalOperator):
    method __init__ (line 591) | def __init__(self, context_id: str, search_query: str, *args, **kwargs):
    method __str__ (line 596) | def __str__(self):
    method get_logical_id_params (line 599) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 609) | def get_logical_op_params(self) -> dict:

FILE: src/palimpzest/query/operators/mixture_of_agents.py
  class MixtureOfAgentsConvert (line 16) | class MixtureOfAgentsConvert(LLMConvert):
    method __init__ (line 18) | def __init__(
    method __str__ (line 41) | def __str__(self):
    method get_id_params (line 48) | def get_id_params(self):
    method get_op_params (line 59) | def get_op_params(self):
    method naive_cost_estimates (line 70) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 106) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class MixtureOfAgentsFilter (line 133) | class MixtureOfAgentsFilter(LLMFilter):
    method __init__ (line 135) | def __init__(
    method __str__ (line 158) | def __str__(self):
    method get_id_params (line 165) | def get_id_params(self):
    method get_op_params (line 176) | def get_op_params(self):
    method naive_cost_estimates (line 187) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 221) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/physical.py
  class PhysicalOperator (line 14) | class PhysicalOperator:
    method __init__ (line 21) | def __init__(
    method __str__ (line 71) | def __str__(self):
    method __eq__ (line 82) | def __eq__(self, other) -> bool:
    method copy (line 85) | def copy(self) -> PhysicalOperator:
    method op_name (line 88) | def op_name(self) -> str:
    method get_id_params (line 92) | def get_id_params(self) -> dict:
    method get_op_params (line 106) | def get_op_params(self) -> dict:
    method get_op_id (line 123) | def get_op_id(self):
    method get_logical_op_id (line 148) | def get_logical_op_id(self) -> str:
    method get_unique_logical_op_id (line 151) | def get_unique_logical_op_id(self) -> str:
    method get_full_op_id (line 154) | def get_full_op_id(self):
    method is_image_op (line 157) | def is_image_op(self) -> bool:
    method is_audio_op (line 161) | def is_audio_op(self) -> bool:
    method __hash__ (line 165) | def __hash__(self):
    method get_model_name (line 168) | def get_model_name(self) -> str | None:
    method get_input_fields (line 172) | def get_input_fields(self):
    method get_fields_to_generate (line 187) | def get_fields_to_generate(self, candidate: DataRecord) -> list[str]:
    method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 224) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/project.py
  class ProjectOp (line 8) | class ProjectOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, project_cols: list[str], *args, **kwargs):
    method __str__ (line 13) | def __str__(self):
    method get_id_params (line 18) | def get_id_params(self):
    method get_op_params (line 22) | def get_op_params(self):
    method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/rag.py
  class RAGConvert (line 18) | class RAGConvert(LLMConvert):
    method __init__ (line 19) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ...
    method __str__ (line 28) | def __str__(self):
    method get_id_params (line 35) | def get_id_params(self):
    method get_op_params (line 45) | def get_op_params(self):
    method naive_cost_estimates (line 55) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method chunk_text (line 84) | def chunk_text(self, text: str, chunk_size: int) -> list[str]:
    method compute_embedding (line 99) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio...
    method compute_similarity (line 129) | def compute_similarity(self, query_embedding: list[float], chunk_embed...
    method get_chunked_candidate (line 135) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method convert (line 195) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class RAGFilter (line 229) | class RAGFilter(LLMFilter):
    method __init__ (line 230) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ...
    method __str__ (line 239) | def __str__(self):
    method get_id_params (line 246) | def get_id_params(self):
    method get_op_params (line 256) | def get_op_params(self):
    method naive_cost_estimates (line 266) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method chunk_text (line 295) | def chunk_text(self, text: str, chunk_size: int) -> list[str]:
    method compute_embedding (line 310) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio...
    method compute_similarity (line 340) | def compute_similarity(self, query_embedding: list[float], chunk_embed...
    method get_chunked_candidate (line 346) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method filter (line 402) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/scan.py
  class ScanPhysicalOp (line 14) | class ScanPhysicalOp(PhysicalOperator, ABC):
    method __init__ (line 21) | def __init__(self, datasource: Any, *args, **kwargs):
    method __str__ (line 25) | def __str__(self):
    method get_id_params (line 30) | def get_id_params(self):
    method get_op_params (line 34) | def get_op_params(self):
    method naive_cost_estimates (line 39) | def naive_cost_estimates(
    method __call__ (line 60) | def __call__(self, idx: int) -> DataRecordSet:
  class MarshalAndScanDataOp (line 95) | class MarshalAndScanDataOp(ScanPhysicalOp):
    method naive_cost_estimates (line 96) | def naive_cost_estimates(
  class ContextScanOp (line 127) | class ContextScanOp(PhysicalOperator):
    method __init__ (line 132) | def __init__(self, context: context.Context, *args, **kwargs):
    method __str__ (line 136) | def __str__(self):
    method get_id_params (line 141) | def get_id_params(self):
    method get_op_params (line 144) | def get_op_params(self):
    method naive_cost_estimates (line 148) | def naive_cost_estimates(
    method __call__ (line 166) | def __call__(self, *args, **kwargs) -> DataRecordSet:

FILE: src/palimpzest/query/operators/search.py
  function make_tool (line 18) | def make_tool(bound_method):
  class SmolAgentsSearch (line 39) | class SmolAgentsSearch(PhysicalOperator):
    method __init__ (line 43) | def __init__(self, context_id: str, search_query: str, *args, **kwargs):
    method __str__ (line 53) | def __str__(self):
    method get_id_params (line 59) | def get_id_params(self):
    method get_op_params (line 67) | def get_op_params(self):
    method naive_cost_estimates (line 75) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method _create_record_set (line 83) | def _create_record_set(
    method __call__ (line 127) | def __call__(self, candidate: DataRecord) -> Any:

FILE: src/palimpzest/query/operators/split.py
  class SplitConvert (line 20) | class SplitConvert(LLMConvert):
    method __init__ (line 21) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,...
    method __str__ (line 32) | def __str__(self):
    method get_id_params (line 38) | def get_id_params(self):
    method get_op_params (line 44) | def get_op_params(self):
    method naive_cost_estimates (line 48) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method get_text_chunks (line 77) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
    method get_chunked_candidate (line 93) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method convert (line 138) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class SplitFilter (line 170) | class SplitFilter(LLMFilter):
    method __init__ (line 171) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,...
    method __str__ (line 182) | def __str__(self):
    method get_id_params (line 188) | def get_id_params(self):
    method get_op_params (line 194) | def get_op_params(self):
    method naive_cost_estimates (line 198) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method get_text_chunks (line 227) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
    method get_chunked_candidate (line 243) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method filter (line 288) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/topk.py
  class Singleton (line 21) | class Singleton:
    method __new__ (line 22) | def __new__(cls, *args, **kw):
  class ClipModel (line 28) | class ClipModel(Singleton):
    method get_model (line 33) | def get_model(cls, model_name: str):
  class TopKOp (line 39) | class TopKOp(PhysicalOperator):
    method __init__ (line 40) | def __init__(
    method __str__ (line 80) | def __str__(self):
    method get_id_params (line 85) | def get_id_params(self):
    method get_op_params (line 97) | def get_op_params(self):
    method naive_cost_estimates (line 110) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method default_search_func (line 122) | def default_search_func(self, index: Collection, query: list[str] | li...
    method _create_record_set (line 157) | def _create_record_set(
    method __call__ (line 214) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/optimizer/cost_model.py
  class BaseCostModel (line 18) | class BaseCostModel:
    method __init__ (line 24) | def __init__(self):
    method get_costed_full_op_ids (line 32) | def get_costed_full_op_ids(self) -> set[str]:
    method __call__ (line 38) | def __call__(self, operator: PhysicalOperator) -> PlanCost:
  class SampleBasedCostModel (line 46) | class SampleBasedCostModel:
    method __init__ (line 49) | def __init__(
    method get_costed_full_op_ids (line 77) | def get_costed_full_op_ids(self):
    method _compute_operator_stats (line 80) | def _compute_operator_stats(self, sentinel_plan_stats: SentinelPlanSta...
    method _compute_naive_plan_cost (line 153) | def _compute_naive_plan_cost(self, operator: PhysicalOperator, source_...
    method __call__ (line 210) | def __call__(self, operator: PhysicalOperator, source_op_estimates: Op...

FILE: src/palimpzest/query/optimizer/optimizer.py
  class Optimizer (line 49) | class Optimizer:
    method __init__ (line 64) | def __init__(
    method update_cost_model (line 164) | def update_cost_model(self, cost_model: BaseCostModel):
    method get_physical_op_params (line 167) | def get_physical_op_params(self):
    method deepcopy_clean (line 176) | def deepcopy_clean(self):
    method update_strategy (line 195) | def update_strategy(self, optimizer_strategy: OptimizationStrategyType):
    method construct_group_tree (line 207) | def construct_group_tree(self, dataset: Dataset) -> tuple[int, dict[st...
    method convert_query_plan_to_group_tree (line 341) | def convert_query_plan_to_group_tree(self, dataset: Dataset) -> str:
    method heuristic_optimization (line 380) | def heuristic_optimization(self, group_id: int) -> None:
    method search_optimization_space (line 386) | def search_optimization_space(self, group_id: int) -> None:
    method optimize (line 415) | def optimize(self, dataset: Dataset) -> list[PhysicalPlan]:

FILE: src/palimpzest/query/optimizer/optimizer_strategy.py
  class OptimizationStrategy (line 13) | class OptimizationStrategy(ABC):
    method get_optimal_plans (line 15) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class GreedyStrategy (line 20) | class GreedyStrategy(OptimizationStrategy):
    method _get_greedy_physical_plan (line 21) | def _get_greedy_physical_plan(self, groups: dict, group_id: int) -> Ph...
    method get_optimal_plans (line 58) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class ParetoStrategy (line 66) | class ParetoStrategy(OptimizationStrategy):
    method _get_candidate_pareto_physical_plans (line 67) | def _get_candidate_pareto_physical_plans(self, groups: dict, group_id:...
    method get_optimal_plans (line 119) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class SentinelStrategy (line 143) | class SentinelStrategy(OptimizationStrategy):
    method _get_sentinel_plan (line 144) | def _get_sentinel_plan(self, groups: dict[str, Group], group_id: int) ...
    method get_optimal_plans (line 172) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class NoOptimizationStrategy (line 179) | class NoOptimizationStrategy(GreedyStrategy):

FILE: src/palimpzest/query/optimizer/optimizer_strategy_type.py
  class OptimizationStrategyType (line 11) | class OptimizationStrategyType(Enum):
    method no_transformation (line 21) | def no_transformation(self) -> bool:
    method is_pareto (line 27) | def is_pareto(self) -> bool:
    method is_not_pareto (line 33) | def is_not_pareto(self) -> bool:

FILE: src/palimpzest/query/optimizer/plan.py
  class Plan (line 14) | class Plan(ABC):
    method compute_plan_id (line 16) | def compute_plan_id(self) -> str:
    method __eq__ (line 20) | def __eq__(self, other) -> bool:
    method __hash__ (line 24) | def __hash__(self) -> int:
    method __repr__ (line 28) | def __repr__(self) -> str:
    method __str__ (line 32) | def __str__(self) -> str:
    method __getitem__ (line 36) | def __getitem__(self, slice) -> tuple:
    method __iter__ (line 40) | def __iter__(self) -> iter:
    method __len__ (line 44) | def __len__(self) -> int:
  class PhysicalPlan (line 47) | class PhysicalPlan(Plan):
    method __init__ (line 48) | def __init__(self, operator: PhysicalOperator, subplans: list[Physical...
    method compute_plan_id (line 70) | def compute_plan_id(self) -> str:
    method get_est_total_outputs (line 80) | def get_est_total_outputs(self, num_samples: int | None = None, curren...
    method _compute_next_unique_full_op_map (line 137) | def _compute_next_unique_full_op_map(self, next_map: dict[str, str | N...
    method get_next_unique_full_op_and_id (line 171) | def get_next_unique_full_op_and_id(self, topo_idx: int, operator: Phys...
    method get_next_unique_full_op_id (line 176) | def get_next_unique_full_op_id(self, topo_idx: int, operator: Physical...
    method _compute_upstream_unique_full_op_ids_map (line 182) | def _compute_upstream_unique_full_op_ids_map(self, upstream_map: dict[...
    method get_upstream_unique_full_op_ids (line 206) | def get_upstream_unique_full_op_ids(self, unique_full_op_id: str) -> l...
    method _compute_source_unique_full_op_ids_map (line 210) | def _compute_source_unique_full_op_ids_map(self, source_map: dict[str,...
    method get_source_unique_full_op_ids (line 234) | def get_source_unique_full_op_ids(self, topo_idx: int, operator: Physi...
    method __eq__ (line 239) | def __eq__(self, other):
    method __hash__ (line 242) | def __hash__(self):
    method __repr__ (line 245) | def __repr__(self) -> str:
    method _get_str (line 248) | def _get_str(self, idx: int = 0, indent: int = 0) -> str:
    method __str__ (line 256) | def __str__(self):
    method __getitem__ (line 259) | def __getitem__(self, slice):
    method __iter__ (line 263) | def __iter__(self):
    method __len__ (line 268) | def __len__(self):
    method _from_ops (line 272) | def _from_ops(cls, ops: list[PhysicalOperator], plan_cost: PlanCost | ...
  class SentinelPlan (line 290) | class SentinelPlan(Plan):
    method __init__ (line 291) | def __init__(self, operator_set: list[PhysicalOperator], subplans: lis...
    method compute_plan_id (line 311) | def compute_plan_id(self) -> str:
    method __eq__ (line 321) | def __eq__(self, other):
    method __hash__ (line 324) | def __hash__(self):
    method __repr__ (line 327) | def __repr__(self) -> str:
    method _get_str (line 330) | def _get_str(self, idx: int = 0, indent: int = 0) -> str:
    method __str__ (line 340) | def __str__(self):
    method __getitem__ (line 343) | def __getitem__(self, slice):
    method __iter__ (line 347) | def __iter__(self):
    method __len__ (line 352) | def __len__(self):
    method _compute_next_unique_logical_op_id_map (line 355) | def _compute_next_unique_logical_op_id_map(self, next_map: dict[str, s...
    method get_next_unique_logical_op_id (line 389) | def get_next_unique_logical_op_id(self, unique_logical_op_id: str) -> ...
    method _compute_root_dataset_ids_map (line 393) | def _compute_root_dataset_ids_map(self, root_dataset_ids_map: dict[str...
    method get_root_dataset_ids (line 421) | def get_root_dataset_ids(self, unique_logical_op_id: str) -> list[str]:
    method _compute_source_unique_logical_op_ids_map (line 425) | def _compute_source_unique_logical_op_ids_map(self, source_map: dict[s...
    method get_source_unique_logical_op_ids (line 449) | def get_source_unique_logical_op_ids(self, unique_logical_op_id: str) ...

FILE: src/palimpzest/query/optimizer/primitives.py
  class Expression (line 12) | class Expression:
    method __init__ (line 19) | def __init__(
    method __eq__ (line 49) | def __eq__(self, other):
    method __str__ (line 52) | def __str__(self):
    method __hash__ (line 61) | def __hash__(self):
    method _compute_expr_id (line 67) | def _compute_expr_id(self) -> int:
    method add_applied_rule (line 70) | def add_applied_rule(self, rule: type[rules.Rule]):
    method set_group_id (line 73) | def set_group_id(self, group_id: int) -> None:
  class LogicalExpression (line 77) | class LogicalExpression(Expression):
  class PhysicalExpression (line 81) | class PhysicalExpression(Expression):
    method from_op_and_logical_expr (line 84) | def from_op_and_logical_expr(cls, op: PhysicalOperator, logical_expres...
  class Group (line 96) | class Group:
    method __init__ (line 103) | def __init__(self, logical_expressions: list[LogicalExpression], field...
    method set_explored (line 119) | def set_explored(self):
    method _compute_group_id (line 122) | def _compute_group_id(self) -> int:

FILE: src/palimpzest/query/optimizer/rules.py
  class Rule (line 62) | class Rule:
    method get_rule_id (line 68) | def get_rule_id(cls):
    method matches_pattern (line 72) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 76) | def substitute(cls, logical_expression: LogicalExpression, **kwargs: d...
  class TransformationRule (line 80) | class TransformationRule(Rule):
    method is_exploration_rule (line 88) | def is_exploration_rule(cls) -> bool:
    method substitute (line 93) | def substitute(
  class ReorderConverts (line 108) | class ReorderConverts(TransformationRule):
    method is_exploration_rule (line 114) | def is_exploration_rule(cls) -> bool:
    method matches_pattern (line 118) | def matches_pattern(cls, logical_expression: Expression) -> bool:
    method substitute (line 124) | def substitute(
  class PushDownFilter (line 245) | class PushDownFilter(TransformationRule):
    method matches_pattern (line 252) | def matches_pattern(cls, logical_expression: Expression) -> bool:
    method substitute (line 258) | def substitute(
  class ImplementationRule (line 375) | class ImplementationRule(Rule):
    method _get_image_fields (line 381) | def _get_image_fields(cls, logical_expression: LogicalExpression) -> s...
    method _get_list_image_fields (line 390) | def _get_list_image_fields(cls, logical_expression: LogicalExpression)...
    method _get_audio_fields (line 399) | def _get_audio_fields(cls, logical_expression: LogicalExpression) -> s...
    method _get_list_audio_fields (line 408) | def _get_list_audio_fields(cls, logical_expression: LogicalExpression)...
    method _is_image_only_operation (line 417) | def _is_image_only_operation(cls, logical_expression: LogicalExpressio...
    method _is_image_operation (line 426) | def _is_image_operation(cls, logical_expression: LogicalExpression) ->...
    method _is_audio_only_operation (line 435) | def _is_audio_only_operation(cls, logical_expression: LogicalExpressio...
    method _is_audio_operation (line 444) | def _is_audio_operation(cls, logical_expression: LogicalExpression) ->...
    method _is_text_only_operation (line 453) | def _is_text_only_operation(cls, logical_expression: LogicalExpression...
    method _is_text_operation (line 462) | def _is_text_operation(cls, logical_expression: LogicalExpression) -> ...
    method _is_text_image_multimodal_operation (line 472) | def _is_text_image_multimodal_operation(cls, logical_expression: Logic...
    method _is_text_audio_multimodal_operation (line 477) | def _is_text_audio_multimodal_operation(cls, logical_expression: Logic...
    method _model_matches_input (line 482) | def _model_matches_input(cls, model: Model, logical_expression: Logica...
    method _embedding_model_matches_input (line 525) | def _embedding_model_matches_input(cls, model: Model, logical_expressi...
    method _get_fixed_op_kwargs (line 534) | def _get_fixed_op_kwargs(cls, logical_expression: LogicalExpression, r...
    method _perform_substitution (line 553) | def _perform_substitution(
  class NonLLMConvertRule (line 606) | class NonLLMConvertRule(ImplementationRule):
    method matches_pattern (line 612) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 618) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class LLMConvertBondedRule (line 623) | class LLMConvertBondedRule(ImplementationRule):
    method matches_pattern (line 629) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 635) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class RAGRule (line 655) | class RAGRule(ImplementationRule):
    method matches_pattern (line 664) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 672) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class MixtureOfAgentsRule (line 711) | class MixtureOfAgentsRule(ImplementationRule):
    method matches_pattern (line 720) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 728) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class CritiqueAndRefineRule (line 752) | class CritiqueAndRefineRule(ImplementationRule):
    method matches_pattern (line 758) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 766) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class SplitRule (line 801) | class SplitRule(ImplementationRule):
    method matches_pattern (line 809) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 817) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class TopKRule (line 839) | class TopKRule(ImplementationRule):
    method matches_pattern (line 846) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 852) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class NonLLMFilterRule (line 861) | class NonLLMFilterRule(ImplementationRule):
    method matches_pattern (line 867) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 874) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class LLMFilterRule (line 879) | class LLMFilterRule(ImplementationRule):
    method matches_pattern (line 885) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 892) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class RelationalJoinRule (line 912) | class RelationalJoinRule(ImplementationRule):
    method matches_pattern (line 918) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 924) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class NestedLoopsJoinRule (line 929) | class NestedLoopsJoinRule(ImplementationRule):
    method matches_pattern (line 935) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 941) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class EmbeddingJoinRule (line 963) | class EmbeddingJoinRule(ImplementationRule):
    method matches_pattern (line 969) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 975) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class SemanticAggregateRule (line 1001) | class SemanticAggregateRule(ImplementationRule):
    method matches_pattern (line 1007) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1013) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class AggregateRule (line 1033) | class AggregateRule(ImplementationRule):
    method matches_pattern (line 1039) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1045) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class AddContextsBeforeComputeRule (line 1067) | class AddContextsBeforeComputeRule(ImplementationRule):
    method matches_pattern (line 1077) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1083) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class BasicSubstitutionRule (line 1111) | class BasicSubstitutionRule(ImplementationRule):
    method matches_pattern (line 1129) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1136) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...

FILE: src/palimpzest/query/optimizer/tasks.py
  class Task (line 17) | class Task:
    method perform (line 24) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class OptimizeGroup (line 33) | class OptimizeGroup(Task):
    method __init__ (line 44) | def __init__(self, group_id: int):
    method perform (line 47) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class ExploreGroup (line 79) | class ExploreGroup(Task):
    method __init__ (line 84) | def __init__(self, group_id: int):
    method perform (line 87) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class OptimizeLogicalExpression (line 119) | class OptimizeLogicalExpression(Task):
    method __init__ (line 127) | def __init__(self, logical_expression: Expression, exploring: bool = F...
    method perform (line 131) | def perform(
  class ApplyRule (line 168) | class ApplyRule(Task):
    method __init__ (line 188) | def __init__(self, rule: type[Rule], logical_expression: Expression, e...
    method perform (line 193) | def perform(
  class OptimizePhysicalExpression (line 277) | class OptimizePhysicalExpression(Task):
    method __init__ (line 287) | def __init__(self, physical_expression: Expression, exploring: bool = ...
    method update_best_physical_expression (line 291) | def update_best_physical_expression(self, group: Group, policy: Policy...
    method _is_dominated (line 324) | def _is_dominated(self, plan_cost: PlanCost, other_plan_cost: PlanCost...
    method _is_pareto_optimal (line 363) | def _is_pareto_optimal(self, expr_plan_cost: PlanCost, pareto_optimal_...
    method update_pareto_optimal_physical_expressions (line 376) | def update_pareto_optimal_physical_expressions(self, group: Group, pol...
    method perform (line 423) | def perform(

FILE: src/palimpzest/query/processor/config.py
  class QueryProcessorConfig (line 10) | class QueryProcessorConfig(BaseModel):
    method to_dict (line 57) | def to_dict(self) -> dict:
    method copy (line 61) | def copy(self) -> QueryProcessorConfig:

FILE: src/palimpzest/query/processor/query_processor.py
  class QueryProcessor (line 18) | class QueryProcessor:
    method __init__ (line 25) | def __init__(
    method execution_id (line 69) | def execution_id(self) -> str:
    method _create_sentinel_plan (line 80) | def _create_sentinel_plan(self, train_dataset: dict[str, Dataset] | No...
    method _execute_best_plan (line 100) | def _execute_best_plan(self, dataset: Dataset, optimizer: Optimizer) -...
    method execute (line 111) | def execute(self) -> DataRecordCollection:

FILE: src/palimpzest/query/processor/query_processor_factory.py
  class QueryProcessorFactory (line 23) | class QueryProcessorFactory:
    method _convert_to_enum (line 26) | def _convert_to_enum(cls, enum_type: type[Enum], value: str) -> Enum:
    method _normalize_strategies (line 34) | def _normalize_strategies(cls, config: QueryProcessorConfig):
    method _normalize_models (line 60) | def _normalize_models(cls, config: QueryProcessorConfig) -> QueryProce...
    method _config_validation_and_normalization (line 109) | def _config_validation_and_normalization(cls, config: QueryProcessorCo...
    method _create_optimizer (line 164) | def _create_optimizer(cls, config: QueryProcessorConfig) -> Optimizer:
    method _create_execution_strategy (line 168) | def _create_execution_strategy(cls, dataset: Dataset, config: QueryPro...
    method _create_sentinel_execution_strategy (line 187) | def _create_sentinel_execution_strategy(cls, config: QueryProcessorCon...
    method create_processor (line 198) | def create_processor(
    method create_and_run_processor (line 238) | def create_and_run_processor(

FILE: src/palimpzest/schemabuilder/schema_builder.py
  class SchemaBuilder (line 21) | class SchemaBuilder:
    method from_file (line 24) | def from_file(cls,
    method from_csv (line 98) | def from_csv(
    method from_jsonld (line 132) | def from_jsonld(
    method from_json (line 176) | def from_json(
    method from_yml (line 201) | def from_yml(

FILE: src/palimpzest/tools/allenpdf.py
  function process_papermage_pdf (line 34) | def process_papermage_pdf(pdf_bytes_docs: list[bytes]):
  function main (line 59) | def main():

FILE: src/palimpzest/tools/pdfparser.py
  function get_md5 (line 17) | def get_md5(file_bytes: bytes) -> str:
  function cosmos_parquet_to_json (line 26) | def cosmos_parquet_to_json(path):
  function cosmos_json_txt (line 99) | def cosmos_json_txt(cosmos_json):
  function cosmos_client (line 111) | def cosmos_client(name: str, data: BinaryIO, output_dir: str, delay=10):
  function get_text_from_pdf (line 191) | def get_text_from_pdf(filename, pdf_bytes, pdfprocessor="pypdf", enable_...

FILE: src/palimpzest/tools/skema_tools.py
  function equations_to_latex (line 10) | def equations_to_latex(image_content):
  function equations_to_latex_base64 (line 19) | def equations_to_latex_base64(image_content):

FILE: src/palimpzest/utils/env_helpers.py
  function load_env (line 5) | def load_env():

FILE: src/palimpzest/utils/hash_helpers.py
  function hash_for_id (line 7) | def hash_for_id(id_str: str, max_chars: int = MAX_ID_CHARS) -> str:
  function hash_for_serialized_dict (line 11) | def hash_for_serialized_dict(dict_obj: dict) -> str:

FILE: src/palimpzest/utils/model_helpers.py
  function get_models (line 8) | def get_models(include_embedding: bool = False, use_vertex: bool = False...
  function get_optimal_models (line 78) | def get_optimal_models(policy: Policy, include_embedding: bool = False, ...
  function use_reasoning_prompt (line 194) | def use_reasoning_prompt(reasoning_effort: str) -> bool:
  function resolve_reasoning_effort (line 202) | def resolve_reasoning_effort(model: Model, reasoning_effort: str) -> str...

FILE: src/palimpzest/utils/model_info_helpers.py
  function _normalize_model_name (line 192) | def _normalize_model_name(name: str) -> str:
  function _extract_version_info (line 197) | def _extract_version_info(name: str) -> tuple[str, str | None, str | None]:
  function fuzzy_match_score (line 233) | def fuzzy_match_score(model_id: str, scores_dict: dict[str, float]) -> f...
  function _extract_model_size (line 297) | def _extract_model_size(model_id: str) -> str | None:
  function derive_model_flags (line 310) | def derive_model_flags(model_id: str) -> dict[str, bool]:
  function _estimate_tps_from_size (line 373) | def _estimate_tps_from_size(model_id: str) -> float | None:
  function predict_local_model_metrics (line 403) | def predict_local_model_metrics(model_id: str) -> dict[str, Any]:
  class ModelMetricsManager (line 458) | class ModelMetricsManager:
    method __new__ (line 464) | def __new__(cls, *args, **kwargs):
    method __init__ (line 469) | def __init__(self):
    method _load_data (line 476) | def _load_data(self):
    method get_model_metrics (line 485) | def get_model_metrics(self, model_name) -> dict[str, Any]:
    method refresh_data (line 489) | def refresh_data(self) -> None:

FILE: src/palimpzest/utils/progress.py
  class ProgressStats (line 32) | class ProgressStats:
  function get_memory_usage (line 42) | def get_memory_usage() -> float:
  class ProgressManager (line 53) | class ProgressManager(ABC):
    method __init__ (line 56) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int...
    method get_task_total (line 117) | def get_task_total(self, unique_full_op_id: str) -> int:
    method get_task_description (line 122) | def get_task_description(self, unique_full_op_id: str) -> str:
    method add_task (line 128) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 133) | def start(self):
    method incr (line 138) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 152) | def finish(self):
  class MockProgressManager (line 157) | class MockProgressManager(ProgressManager):
    method __init__ (line 160) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int...
    method add_task (line 163) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 166) | def start(self):
    method incr (line 169) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 172) | def finish(self):
    method incr_overall_progress_cost (line 175) | def incr_overall_progress_cost(self, cost_delta: float):
  class PZProgressManager (line 178) | class PZProgressManager(ProgressManager):
    method __init__ (line 181) | def __init__(self, plan: PhysicalPlan, num_samples: int | None = None):
    method add_task (line 185) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 203) | def start(self):
    method incr (line 213) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 265) | def finish(self):
    method update_stats (line 278) | def update_stats(self, unique_full_op_id: str, **kwargs):
  class PZSentinelProgressManager (line 288) | class PZSentinelProgressManager(ProgressManager):
    method __init__ (line 289) | def __init__(self, plan: SentinelPlan, sample_budget: int | None, samp...
    method _is_llm_op (line 358) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool:
    method get_task_description (line 365) | def get_task_description(self, unique_logical_op_id: str) -> str:
    method add_task (line 370) | def add_task(self, unique_logical_op_id: str, op_str: str, total: int):
    method start (line 388) | def start(self):
    method incr_overall_progress_cost (line 398) | def incr_overall_progress_cost(self, cost_delta: float):
    method incr (line 411) | def incr(self, unique_logical_op_id: str, num_samples: int, display_te...
    method finish (line 455) | def finish(self):
    method update_stats (line 468) | def update_stats(self, unique_logical_op_id: str, **kwargs):
  function create_progress_manager (line 478) | def create_progress_manager(

FILE: src/palimpzest/utils/udfs.py
  function url_to_file (line 14) | def url_to_file(candidate: dict):
  function file_to_xls (line 28) | def file_to_xls(candidate: dict):
  function xls_to_tables (line 34) | def xls_to_tables(candidate: dict):

FILE: src/palimpzest/validator/validator.py
  class Validator (line 25) | class Validator:
    method __init__ (line 33) | def __init__(self, model: Model = Model.o4_MINI):
    method map_score_fn (line 38) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method flat_map_score_fn (line 41) | def flat_map_score_fn(self, fields: list[str], input_record: dict, out...
    method filter_score_fn (line 44) | def filter_score_fn(self, filter_str: str, input_record: dict, output:...
    method join_score_fn (line 47) | def join_score_fn(self, condition: str, left_input_record: dict, right...
    method topk_score_fn (line 50) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
    method _get_gen_stats_from_completion (line 53) | def _get_gen_stats_from_completion(self, completion, start_time: float...
    method _default_map_score_fn (line 75) | def _default_map_score_fn(self, op: LLMConvert, fields: list[str], inp...
    method _default_flat_map_score_fn (line 111) | def _default_flat_map_score_fn(self, op: LLMConvert, fields: list[str]...
    method _default_filter_score_fn (line 150) | def _default_filter_score_fn(self, op: LLMFilter, filter_str: str, inp...
    method _default_join_score_fn (line 190) | def _default_join_score_fn(self, op: JoinOp, condition: str, left_inpu...
    method _default_topk_score_fn (line 227) | def _default_topk_score_fn(self, op: TopKOp, fields: list[str], input_...
    method _score_map (line 267) | def _score_map(self, op: LLMConvert, fields: list[str], input_record: ...
    method _score_flat_map (line 276) | def _score_flat_map(self, op: LLMConvert, fields: list[str], input_rec...
    method _score_filter (line 285) | def _score_filter(self, op: LLMFilter, filter_str: str, input_record: ...
    method _score_join (line 294) | def _score_join(self, op: JoinOp, condition: str, left_input_record: D...
    method _score_topk (line 303) | def _score_topk(self, op: TopKOp, fields: list[str], input_record: Dat...

FILE: tests/pytest/conftest.py
  function dataset (line 26) | def dataset(request, enron_eval_tiny, real_estate_eval_tiny):
  function workload (line 36) | def workload(
  function policy (line 56) | def policy(request):
  function physical_plan (line 68) | def physical_plan(
  function sentinel_plan (line 92) | def sentinel_plan(
  function execution_data (line 105) | def execution_data(
  function expected_records (line 120) | def expected_records(
  function champion_outputs (line 146) | def champion_outputs(
  function expected_qualities (line 164) | def expected_qualities(
  function side_effect (line 184) | def side_effect(
  function operator_to_stats (line 203) | def operator_to_stats(
  function expected_plan (line 232) | def expected_plan(

FILE: tests/pytest/fixtures/champion_outputs.py
  function scan_convert_filter_champion_outputs (line 10) | def scan_convert_filter_champion_outputs(scan_convert_filter_sentinel_pl...
  function scan_convert_filter_empty_champion_outputs (line 47) | def scan_convert_filter_empty_champion_outputs(scan_convert_filter_senti...
  function scan_convert_filter_varied_champion_outputs (line 84) | def scan_convert_filter_varied_champion_outputs(scan_convert_filter_sent...
  function scan_multi_convert_multi_filter_champion_outputs (line 121) | def scan_multi_convert_multi_filter_champion_outputs(scan_multi_convert_...

FILE: tests/pytest/fixtures/datasets.py
  class RealEstateListingDataset (line 16) | class RealEstateListingDataset(IterDataset):
    method __init__ (line 17) | def __init__(self, listings_dir):
    method __len__ (line 28) | def __len__(self):
    method __getitem__ (line 31) | def __getitem__(self, idx: int):
  class CostModelTestDataset (line 49) | class CostModelTestDataset(IterDataset):
    method __init__ (line 50) | def __init__(self):
    method __len__ (line 54) | def __len__(self):
    method __getitem__ (line 57) | def __getitem__(self, idx: int):
  function project_root (line 66) | def project_root() -> Path:
  function enron_eval_tiny_data_path (line 71) | def enron_eval_tiny_data_path(project_root) -> str:
  function real_estate_eval_tiny_data_path (line 76) | def real_estate_eval_tiny_data_path(project_root) -> str:
  function enron_eval_tiny (line 82) | def enron_eval_tiny(enron_eval_tiny_data_path):
  function real_estate_eval_tiny (line 87) | def real_estate_eval_tiny(real_estate_eval_tiny_data_path):
  function cost_model_test_dataset (line 92) | def cost_model_test_dataset():

FILE: tests/pytest/fixtures/execution_data.py
  function scan_convert_filter_execution_data (line 11) | def scan_convert_filter_execution_data(scan_convert_filter_sentinel_plan...
  function scan_convert_filter_varied_execution_data (line 114) | def scan_convert_filter_varied_execution_data(scan_convert_filter_sentin...
  function scan_multi_convert_multi_filter_execution_data (line 225) | def scan_multi_convert_multi_filter_execution_data(scan_multi_convert_mu...

FILE: tests/pytest/fixtures/expected_physical_plans.py
  function get_three_converts_plan (line 17) | def get_three_converts_plan(three_converts_workload, enron_eval_tiny, em...
  function three_converts_min_cost_expected_plan (line 52) | def three_converts_min_cost_expected_plan(three_converts_workload, enron...
  function three_converts_max_quality_expected_plan (line 71) | def three_converts_max_quality_expected_plan(three_converts_workload, en...
  function three_converts_min_cost_at_fixed_quality_expected_plan (line 90) | def three_converts_min_cost_at_fixed_quality_expected_plan(three_convert...
  function three_converts_max_quality_at_fixed_cost_expected_plan (line 109) | def three_converts_max_quality_at_fixed_cost_expected_plan(three_convert...
  function get_one_filter_one_convert_plan (line 128) | def get_one_filter_one_convert_plan(one_filter_one_convert_workload, enr...
  function one_filter_one_convert_min_cost_expected_plan (line 163) | def one_filter_one_convert_min_cost_expected_plan(one_filter_one_convert...
  function get_two_converts_two_filters_plan (line 186) | def get_two_converts_two_filters_plan(two_converts_two_filters_workload,...
  function two_converts_two_filters_min_cost_expected_plan (line 228) | def two_converts_two_filters_min_cost_expected_plan(two_converts_two_fil...
  function two_converts_two_filters_max_quality_expected_plan (line 257) | def two_converts_two_filters_max_quality_expected_plan(two_converts_two_...
  function two_converts_two_filters_min_cost_at_fixed_quality_expected_plan (line 286) | def two_converts_two_filters_min_cost_at_fixed_quality_expected_plan(two...
  function two_converts_two_filters_max_quality_at_fixed_cost_expected_plan (line 315) | def two_converts_two_filters_max_quality_at_fixed_cost_expected_plan(two...

FILE: tests/pytest/fixtures/expected_qualities.py
  function scan_convert_filter_qualities (line 8) | def scan_convert_filter_qualities(scan_convert_filter_execution_data):
  function scan_convert_filter_empty_qualities (line 19) | def scan_convert_filter_empty_qualities(scan_convert_filter_execution_da...
  function scan_convert_filter_varied_qualities (line 44) | def scan_convert_filter_varied_qualities(scan_convert_filter_varied_exec...
  function scan_convert_filter_varied_override_qualities (line 76) | def scan_convert_filter_varied_override_qualities(scan_convert_filter_va...
  function scan_multi_convert_multi_filter_qualities (line 135) | def scan_multi_convert_multi_filter_qualities(scan_multi_convert_multi_f...

FILE: tests/pytest/fixtures/expected_records.py
  function enron_all_expected_records (line 12) | def enron_all_expected_records(enron_eval_tiny_data_path):
  function enron_filter_expected_records (line 25) | def enron_filter_expected_records(enron_all_expected_records):
  function real_estate_all_expected_records (line 35) | def real_estate_all_expected_records(real_estate_eval_tiny_data_path, im...
  function real_estate_one_to_many_expected_records (line 58) | def real_estate_one_to_many_expected_records(real_estate_eval_tiny_data_...
  function scan_convert_filter_expected_outputs (line 84) | def scan_convert_filter_expected_outputs(foobar_schema):
  function scan_convert_filter_empty_expected_outputs (line 102) | def scan_convert_filter_empty_expected_outputs():
  function scan_convert_filter_varied_expected_outputs (line 106) | def scan_convert_filter_varied_expected_outputs(foobar_schema):
  function scan_multi_convert_multi_filter_expected_outputs (line 127) | def scan_multi_convert_multi_filter_expected_outputs(foobar_schema, baz_...

FILE: tests/pytest/fixtures/models.py
  function embedding_text_only_model (line 9) | def embedding_text_only_model():

FILE: tests/pytest/fixtures/operator_to_stats.py
  function get_three_converts_logical_and_full_op_ids (line 15) | def get_three_converts_logical_and_full_op_ids(three_converts_workload, ...
  function three_converts_min_cost_operator_to_stats (line 71) | def three_converts_min_cost_operator_to_stats(three_converts_workload, e...
  function three_converts_max_quality_operator_to_stats (line 100) | def three_converts_max_quality_operator_to_stats(three_converts_workload...
  function three_converts_min_cost_at_fixed_quality_operator_to_stats (line 129) | def three_converts_min_cost_at_fixed_quality_operator_to_stats(three_con...
  function three_converts_max_quality_at_fixed_cost_operator_to_stats (line 158) | def three_converts_max_quality_at_fixed_cost_operator_to_stats(three_con...
  function get_one_filter_one_convert_logical_and_full_op_ids (line 190) | def get_one_filter_one_convert_logical_and_full_op_ids(one_filter_one_co...
  function one_filter_one_convert_min_cost_operator_to_stats (line 240) | def one_filter_one_convert_min_cost_operator_to_stats(one_filter_one_con...
  function get_two_converts_two_filters_logical_and_full_op_ids (line 264) | def get_two_converts_two_filters_logical_and_full_op_ids(two_converts_tw...
  function two_converts_two_filters_min_cost_operator_to_stats (line 332) | def two_converts_two_filters_min_cost_operator_to_stats(two_converts_two...
  function two_converts_two_filters_max_quality_operator_to_stats (line 366) | def two_converts_two_filters_max_quality_operator_to_stats(two_converts_...
  function two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats (line 400) | def two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats...
  function two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats (line 434) | def two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats...

FILE: tests/pytest/fixtures/physical_plans.py
  function scan_only_plan (line 16) | def scan_only_plan(enron_eval_tiny):
  function non_llm_filter_plan (line 23) | def non_llm_filter_plan(enron_eval_tiny):
  function llm_filter_plan (line 36) | def llm_filter_plan(enron_eval_tiny):
  function bonded_llm_convert_plan (line 51) | def bonded_llm_convert_plan(email_schema, enron_eval_tiny):
  function rag_convert_plan (line 64) | def rag_convert_plan(email_schema, enron_eval_tiny, embedding_text_only_...
  function image_convert_plan (line 80) | def image_convert_plan(real_estate_listing_files_schema, image_real_esta...
  function one_to_many_convert_plan (line 93) | def one_to_many_convert_plan(real_estate_listing_files_schema, room_real...
  function scan_convert_filter_sentinel_plan (line 107) | def scan_convert_filter_sentinel_plan(foobar_schema):
  function scan_multi_convert_multi_filter_sentinel_plan (line 134) | def scan_multi_convert_multi_filter_sentinel_plan(foobar_schema, baz_sch...

FILE: tests/pytest/fixtures/schemas.py
  function email_schema (line 11) | def email_schema():
  function real_estate_listing_files_schema (line 21) | def real_estate_listing_files_schema():
  function text_real_estate_listing_schema (line 32) | def text_real_estate_listing_schema(real_estate_listing_files_schema):
  function image_real_estate_listing_schema (line 42) | def image_real_estate_listing_schema(real_estate_listing_files_schema):
  function room_real_estate_listing_schema (line 57) | def room_real_estate_listing_schema(real_estate_listing_files_schema):
  function case_data_schema (line 69) | def case_data_schema():
  function foobar_schema (line 100) | def foobar_schema():
  function baz_schema (line 108) | def baz_schema():

FILE: tests/pytest/fixtures/side_effects.py
  function enron_filter (line 8) | def enron_filter():
  function enron_convert (line 20) | def enron_convert(email_schema):
  function real_estate_convert (line 52) | def real_estate_convert(image_real_estate_listing_schema):
  function real_estate_one_to_many_convert (line 70) | def real_estate_one_to_many_convert(room_real_estate_listing_schema):

FILE: tests/pytest/fixtures/workloads.py
  function within_two_miles_of_mit (line 5) | def within_two_miles_of_mit(record):
  function in_price_range (line 26) | def in_price_range(record):
  function enron_workload (line 39) | def enron_workload(enron_eval_tiny, email_schema):
  function small_real_estate_workload (line 52) | def small_real_estate_workload(
  function real_estate_workload (line 68) | def real_estate_workload(
  function three_converts_workload (line 92) | def three_converts_workload(enron_eval_tiny, email_schema, foobar_schema...
  function one_filter_one_convert_workload (line 102) | def one_filter_one_convert_workload(enron_eval_tiny, email_schema):
  function two_converts_two_filters_workload (line 111) | def two_converts_two_filters_workload(enron_eval_tiny, email_schema, foo...

FILE: tests/pytest/test_aggregate.py
  class TextInputSchema (line 21) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 38) | class OutputSchema(BaseModel):
  function create_input_record (line 41) | def create_input_record(input_schema: type[BaseModel], idx: int) -> Data...
  function mock_generator_call (line 59) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_aggregate (line 77) | def test_aggregate(mocker, input_schema, physical_op_class):

FILE: tests/pytest/test_convert.py
  function test_convert (line 27) | def test_convert(mocker, convert_op, side_effect, email_schema, enron_ev...

FILE: tests/pytest/test_dataset.py
  function sample_df (line 11) | def sample_df():
  function test_dataset_initialization (line 19) | def test_dataset_initialization(sample_df):
  function test_dataset_filter (line 25) | def test_dataset_filter(sample_df):
  function test_dataset_add_columns (line 39) | def test_dataset_add_columns(sample_df):

FILE: tests/pytest/test_distinct.py
  function sample_df (line 20) | def sample_df():
  function test_distinct (line 29) | def test_distinct(sample_df, execution_strategy):
  function test_dataset_with_distinct_cols (line 39) | def test_dataset_with_distinct_cols(sample_df, execution_strategy):
  function test_dataset_with_distinct_cols_and_limit (line 49) | def test_dataset_with_distinct_cols_and_limit(sample_df, execution_strat...
  function test_dataset_with_distinct_cols_and_filter (line 59) | def test_dataset_with_distinct_cols_and_filter(sample_df, execution_stra...

FILE: tests/pytest/test_dynamic_models.py
  function input_schema (line 48) | def input_schema():
  function output_schema (line 56) | def output_schema():
  function sample_record (line 64) | def sample_record(input_schema):
  function mock_litellm_response (line 70) | def mock_litellm_response():
  class TestModelInstantiation (line 85) | class TestModelInstantiation:
    method test_known_model_instantiation (line 88) | def test_known_model_instantiation(self):
    method test_model_instantiation_with_string (line 94) | def test_model_instantiation_with_string(self):
    method test_unknown_model_raises_error (line 101) | def test_unknown_model_raises_error(self):
    method test_model_properties_from_specs (line 106) | def test_model_properties_from_specs(self):
    method test_model_provider_property (line 115) | def test_model_provider_property(self):
    method test_model_api_base_parameter (line 123) | def test_model_api_base_parameter(self):
  class TestModelRegistry (line 135) | class TestModelRegistry:
    method test_models_registered_on_creation (line 138) | def test_models_registered_on_creation(self):
    method test_get_all_models_returns_list (line 148) | def test_get_all_models_returns_list(self):
    method test_registry_contains_expected_models (line 154) | def test_registry_contains_expected_models(self):
  class TestModelEqualityAndHashing (line 172) | class TestModelEqualityAndHashing:
    method test_model_equality_same_instance (line 175) | def test_model_equality_same_instance(self):
    method test_model_equality_same_value (line 180) | def test_model_equality_same_value(self):
    method test_model_equality_with_string (line 186) | def test_model_equality_with_string(self):
    method test_model_inequality (line 191) | def test_model_inequality(self):
    method test_model_hash_consistency (line 195) | def test_model_hash_consistency(self):
    method test_model_usable_in_set (line 201) | def test_model_usable_in_set(self):
    method test_model_usable_as_dict_key (line 206) | def test_model_usable_as_dict_key(self):
    method test_model_str_repr (line 211) | def test_model_str_repr(self):
    method test_model_lt_comparison (line 217) | def test_model_lt_comparison(self):
  class TestModelHelperFunctions (line 229) | class TestModelHelperFunctions:
    method test_get_models_with_openai_key (line 232) | def test_get_models_with_openai_key(self):
    method test_get_models_excludes_embedding_by_default (line 239) | def test_get_models_excludes_embedding_by_default(self):
    method test_get_models_includes_embedding_when_requested (line 246) | def test_get_models_includes_embedding_when_requested(self):
    method test_get_models_empty_without_keys (line 253) | def test_get_models_empty_without_keys(self):
    method test_get_optimal_models_returns_top_models (line 264) | def test_get_optimal_models_returns_top_models(self):
    method test_get_optimal_models_respects_policy (line 270) | def test_get_optimal_models_respects_policy(self):
    method test_get_optimal_models_never_returns_empty_with_available_models (line 283) | def test_get_optimal_models_never_returns_empty_with_available_models(...
    method test_get_optimal_models_fallback_returns_best_by_primary_metric (line 293) | def test_get_optimal_models_fallback_returns_best_by_primary_metric(se...
    method test_get_optimal_models_fallback_with_time_policy (line 308) | def test_get_optimal_models_fallback_with_time_policy(self):
  class TestGeneratorIntegration (line 323) | class TestGeneratorIntegration:
    method test_generator_uses_model_value (line 327) | def test_generator_uses_model_value(
    method test_generator_with_different_providers (line 354) | def test_generator_with_different_providers(
  class TestQueryProcessorIntegration (line 384) | class TestQueryProcessorIntegration:
    method test_factory_accepts_model_list (line 388) | def test_factory_accepts_model_list(self, mock_processor_cls):
    method test_factory_auto_selects_models_when_none_provided (line 409) | def test_factory_auto_selects_models_when_none_provided(self):
  class TestEndToEndIntegration (line 439) | class TestEndToEndIntegration:
    method test_simple_sem_map_pipeline (line 446) | def test_simple_sem_map_pipeline(self):
    method test_pipeline_with_filter (line 490) | def test_pipeline_with_filter(self):
    method test_pipeline_with_auto_model_selection (line 526) | def test_pipeline_with_auto_model_selection(self):
  class TestVLLMModelSupport (line 555) | class TestVLLMModelSupport:
    method test_vllm_model_creation_with_api_base (line 560) | def test_vllm_model_creation_with_api_base(self):
    method test_vllm_model_stores_extra_kwargs (line 566) | def test_vllm_model_stores_extra_kwargs(self):
    method test_vllm_model_without_api_base_raises (line 571) | def test_vllm_model_without_api_base_raises(self):
    method test_vllm_model_cost_is_zero (line 578) | def test_vllm_model_cost_is_zero(self):
    method test_predict_local_model_metrics_known_model (line 590) | def test_predict_local_model_metrics_known_model(self):
    method test_predict_local_model_metrics_unknown_model (line 596) | def test_predict_local_model_metrics_unknown_model(self):
    method test_vllm_model_has_quality_score (line 602) | def test_vllm_model_has_quality_score(self):
    method test_vllm_model_has_latency (line 608) | def test_vllm_model_has_latency(self):
    method test_vllm_model_unknown_gets_defaults (line 614) | def test_vllm_model_unknown_gets_defaults(self):
    method test_fuzzy_match_exact_substring (line 622) | def test_fuzzy_match_exact_substring(self):
    method test_fuzzy_match_normalized (line 627) | def test_fuzzy_match_normalized(self):
    method test_fuzzy_match_no_match_returns_none (line 632) | def test_fuzzy_match_no_match_returns_none(self):
    method test_derive_model_flags_llama (line 639) | def test_derive_model_flags_llama(self):
    method test_derive_model_flags_non_llama (line 644) | def test_derive_model_flags_non_llama(self):
    method test_derive_model_flags_clip (line 649) | def test_derive_model_flags_clip(self):
    method test_derive_model_flags_gpt5 (line 654) | def test_derive_model_flags_gpt5(self):
    method test_derive_model_flags_o_model (line 659) | def test_derive_model_flags_o_model(self):
    method test_vllm_model_is_vllm (line 666) | def test_vllm_model_is_vllm(self):
    method test_vllm_llama_model_is_llama (line 671) | def test_vllm_llama_model_is_llama(self):
    method test_vllm_non_llama_is_not_llama (line 676) | def test_vllm_non_llama_is_not_llama(self):
    method test_vllm_model_defaults (line 683) | def test_vllm_model_defaults(self):
    method test_factory_rejects_multiple_vllm_models (line 691) | def test_factory_rejects_multiple_vllm_models(self):
    method test_generator_passes_vllm_kwargs (line 710) | def test_generator_passes_vllm_kwargs(self, mock_completion, sample_re...

FILE: tests/pytest/test_dynamicschema.py
  function test_dynamicschema_jsonld (line 15) | def test_dynamicschema_jsonld(project_root: Path):
  function test_dynamicschema_csv (line 20) | def test_dynamicschema_csv(project_root: Path):
  function test_dynamicschema_json (line 26) | def test_dynamicschema_json(mocker, enron_workload, enron_convert, enron...
  function test_dynamicschema_yml (line 55) | def test_dynamicschema_yml(mocker, enron_workload, enron_convert, enron_...

FILE: tests/pytest/test_execution.py
  class TestExecution (line 19) | class TestExecution:
    method test_execute_full_plan (line 54) | def test_execute_full_plan(self, mocker, execution_strategy, dataset, ...

FILE: tests/pytest/test_filter.py
  class TextInputSchema (line 26) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 30) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 34) | class AudioInputSchema(BaseModel):
  function mock_generator_call (line 43) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_filter (line 61) | def test_filter(mocker, input_schema, physical_op_class, embedding_text_...

FILE: tests/pytest/test_generator.py
  function generate_session_id (line 15) | def generate_session_id() -> str:
  function question (line 24) | def question():
  function output_schema (line 31) | def output_schema():
  function test_generator (line 46) | def test_generator(model, question, output_schema):
  function test_vllm_generator (line 55) | def test_vllm_generator(question, output_schema):
  class TextInputSchema (line 154) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 160) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 166) | class AudioInputSchema(BaseModel):
  class AnimalOutputSchema (line 179) | class AnimalOutputSchema(BaseModel):
  function create_input_record (line 468) | def create_input_record(input_schema, modality: str):
  function get_model_for_provider (line 496) | def get_model_for_provider(provider: str) -> Model:
  function get_input_schema_for_modality (line 514) | def get_input_schema_for_modality(modality: str):
  function check_api_key (line 570) | def check_api_key(provider: str) -> bool:
  function is_modality_supported (line 579) | def is_modality_supported(provider: str, modality: str) -> bool:
  function within_tolerance (line 584) | def within_tolerance(actual: int, expected: int, tolerance: float = 0.05...
  function assert_stats_match (line 592) | def assert_stats_match(gen_stats, expected: dict, request_name: str, pro...
  function test_generator_stats (line 675) | def test_generator_stats(provider, modality):

FILE: tests/pytest/test_iter_dataset.py
  function temp_text_file (line 17) | def temp_text_file():
  function temp_text_dir (line 25) | def temp_text_dir():
  function list_values (line 38) | def list_values():
  function df_values (line 42) | def df_values():
  function test_text_dataset (line 46) | def test_text_dataset(temp_text_dir):
  function test_memory_dataset_list (line 58) | def test_memory_dataset_list(list_values):
  function test_memory_dataset_df (line 69) | def test_memory_dataset_df(df_values):
  function test_memory_dataset_copy (line 81) | def test_memory_dataset_copy():
  function temp_html_dir (line 89) | def temp_html_dir(tmp_path):
  function test_html_dataset (line 103) | def test_html_dataset(temp_html_dir):
  function test_invalid_directory (line 113) | def test_invalid_directory():

FILE: tests/pytest/test_join.py
  class TextInputSchema (line 21) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel):
  function create_input_record (line 38) | def create_input_record(schema: type[BaseModel]) -> DataRecord:
  function mock_generator_call (line 53) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function embedding_join_mock_generator_call (line 60) | def embedding_join_mock_generator_call(candidate, fields, right_candidat...
  function test_join (line 82) | def test_join(mocker, left_input_schema, right_input_schema, physical_op...
  function test_embedding_join (line 134) | def test_embedding_join(mocker, embedding_text_only_model):

FILE: tests/pytest/test_map.py
  class TextInputSchema (line 25) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 29) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 33) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 42) | class OutputSchema(BaseModel):
  function mock_generator_call (line 45) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_map (line 63) | def test_map(mocker, input_schema, physical_op_class, embedding_text_onl...

FILE: tests/pytest/test_optimizer.py
  class TestPrimitives (line 22) | class TestPrimitives:
    method test_group_id_equality (line 23) | def test_group_id_equality(self, email_schema):
  class TestOptimizer (line 105) | class TestOptimizer:
    method test_basic_functionality (line 106) | def test_basic_functionality(self, enron_eval_tiny, opt_strategy):
    method test_simple_max_quality_convert (line 123) | def test_simple_max_quality_convert(self, enron_eval_tiny, email_schem...
    method test_simple_min_cost_convert (line 147) | def test_simple_min_cost_convert(self, enron_eval_tiny, email_schema, ...
    method test_simple_min_time_convert (line 166) | def test_simple_min_time_convert(self, enron_eval_tiny, email_schema, ...
    method test_simple_vllm_convert (line 185) | def test_simple_vllm_convert(self, enron_eval_tiny, email_schema, opt_...
    method test_push_down_filter (line 205) | def test_push_down_filter(self, enron_eval_tiny, email_schema, opt_str...
    method test_push_down_two_filters (line 226) | def test_push_down_two_filters(self, enron_eval_tiny, email_schema, op...
    method test_small_real_estate_logical_reorder (line 249) | def test_small_real_estate_logical_reorder(self, small_real_estate_wor...
    method test_real_estate_logical_reorder (line 272) | def test_real_estate_logical_reorder(self, real_estate_workload, opt_s...
    method test_seven_filters (line 297) | def test_seven_filters(self, enron_eval_tiny, email_schema, opt_strate...
  class MockSampleBasedCostModel (line 342) | class MockSampleBasedCostModel:
    method __init__ (line 345) | def __init__(self, operator_to_stats):
    method get_costed_full_op_ids (line 358) | def get_costed_full_op_ids(self):
    method __call__ (line 361) | def __call__(
  class TestParetoOptimizer (line 451) | class TestParetoOptimizer:
    method test_pareto_optimization_strategy (line 452) | def test_pareto_optimization_strategy(self, workload, policy, operator...

FILE: tests/pytest/test_physical.py
  class SimpleSchema (line 15) | class SimpleSchema(BaseModel):
  class SimpleSchemaTwo (line 19) | class SimpleSchemaTwo(BaseModel):
  function test_physical_operator_init (line 24) | def test_physical_operator_init():
  function test_physical_operator_equality (line 41) | def test_physical_operator_equality():
  function test_physical_operator_str (line 52) | def test_physical_operator_str():
  function test_physical_operator_id_generation (line 64) | def test_physical_operator_id_generation():
  function test_physical_operator_copy (line 82) | def test_physical_operator_copy():

FILE: tests/pytest/test_records.py
  class TestSchema (line 11) | class TestSchema(BaseModel):
  class TestDataRecord (line 16) | class TestDataRecord:
    method sample_record (line 18) | def sample_record(self):
    method sample_df (line 24) | def sample_df(self):
    method test_create_record (line 31) | def test_create_record(self, sample_record):
    method test_record_equality (line 37) | def test_record_equality(self, sample_record):
    method test_to_df (line 42) | def test_to_df(self, sample_df):
    method test_to_df_with_project_cols (line 51) | def test_to_df_with_project_cols(self, sample_df):
    method test_invalid_attribute (line 60) | def test_invalid_attribute(self, sample_record):
    method test_to_dict (line 65) | def test_to_dict(self, sample_record):
    method test_to_json_str (line 71) | def test_to_json_str(self, sample_record):

FILE: tests/pytest/test_rules.py
  function schema (line 12) | def schema():
  function base_scan_op (line 19) | def base_scan_op(schema):
  function test_substitute_methods (line 25) | def test_substitute_methods(base_scan_op):

FILE: tests/pytest/test_scan.py
  class List (line 9) | class List(BaseModel):
  function test_marshal_and_scan_memory_source (line 13) | def test_marshal_and_scan_memory_source():

FILE: tests/pytest/test_schemas.py
  class Dog (line 15) | class Dog(BaseModel):
  class Cat (line 19) | class Cat(BaseModel):
  function test_schema_equality (line 23) | def test_schema_equality():
  function test_get_schema_field_names (line 27) | def test_get_schema_field_names():
  function test_project_schema (line 31) | def test_project_schema():
  function test_create_schema_from_fields (line 40) | def test_create_schema_from_fields():
  function test_create_schema_from_df (line 51) | def test_create_schema_from_df():
  function test_union_schemas (line 67) | def test_union_schemas():

FILE: website/src/components/HomepageFeatures/index.tsx
  type FeatureItem (line 6) | type FeatureItem = {
  function Feature (line 46) | function Feature({title, Svg, description}: FeatureItem) {
  function HomepageFeatures (line 60) | function HomepageFeatures(): ReactNode {

FILE: website/src/components/ResearchPage/admonitions.tsx
  type AbstractProps (line 3) | interface AbstractProps {
  function Abstract (line 9) | function Abstract({ children }: AbstractProps) {

FILE: website/src/pages/index.tsx
  function HomepageHeader (line 11) | function HomepageHeader() {
  function Home (line 32) | function Home(): ReactNode {