SYMBOL INDEX (1755 symbols across 130 files) FILE: abacus-research/biodex-ablation.py class BiodexValidator (line 32) | class BiodexValidator(pz.Validator): method __init__ (line 33) | def __init__( method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict: method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list): method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list): method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset): method __init__ (line 147) | def __init__( method __len__ (line 169) | def __len__(self): method __getitem__ (line 172) | def __getitem__(self, idx: int): function search_func (line 291) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... function compute_target_record (line 393) | def compute_target_record(entry): function rank_precision_at_k (line 405) | def rank_precision_at_k(preds: list, targets: list, k: int): function compute_avg_rp_at_k (line 422) | def compute_avg_rp_at_k(records, k=5): FILE: abacus-research/biodex-demo.py class BiodexValidator (line 32) | class BiodexValidator(pz.Validator): method __init__ (line 33) | def __init__( method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict: method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list): method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list): method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset): method __init__ (line 147) | def __init__( method __len__ (line 169) | def __len__(self): method __getitem__ (line 172) | def __getitem__(self, idx: int): function search_func (line 332) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... function compute_target_record (line 435) | def compute_target_record(entry): function rank_precision_at_k (line 447) | def rank_precision_at_k(preds: list, targets: list, k: int): function compute_avg_rp_at_k (line 464) | def compute_avg_rp_at_k(records, k=5): FILE: abacus-research/biodex-max-quality-at-cost.py class BiodexValidator (line 34) | class BiodexValidator(pz.Validator): method __init__ (line 35) | def __init__( method _compute_pmid_to_label (line 57) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict: method rank_precision_at_k (line 70) | def rank_precision_at_k(self, preds: list | None, targets: list): method term_recall (line 95) | def term_recall(self, preds: list | None, targets: list): method map_score_fn (line 125) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 138) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class BiodexDataset (line 148) | class BiodexDataset(pz.IterDataset): method __init__ (line 149) | def __init__( method __len__ (line 171) | def __len__(self): method __getitem__ (line 174) | def __getitem__(self, idx: int): function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... function compute_target_record (line 423) | def compute_target_record(entry): function rank_precision_at_k (line 435) | def rank_precision_at_k(preds: list, targets: list, k: int): function compute_avg_rp_at_k (line 452) | def compute_avg_rp_at_k(records, k=5): FILE: abacus-research/biodex-min-at-fixed-quality.py class BiodexValidator (line 33) | class BiodexValidator(pz.Validator): method __init__ (line 34) | def __init__( method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict: method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list): method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list): method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset): method __init__ (line 148) | def __init__( method __len__ (line 170) | def __len__(self): method __getitem__ (line 173) | def __getitem__(self, idx: int): function search_func (line 318) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... function compute_target_record (line 415) | def compute_target_record(entry): function rank_precision_at_k (line 427) | def rank_precision_at_k(preds: list, targets: list, k: int): function compute_avg_rp_at_k (line 444) | def compute_avg_rp_at_k(records, k=5): FILE: abacus-research/biodex-pareto-cascades.py class BiodexValidator (line 33) | class BiodexValidator(pz.Validator): method __init__ (line 34) | def __init__( method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict: method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list): method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list): method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset): method __init__ (line 148) | def __init__( method __len__ (line 170) | def __len__(self): method __getitem__ (line 173) | def __getitem__(self, idx: int): function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... function compute_target_record (line 420) | def compute_target_record(entry): function rank_precision_at_k (line 432) | def rank_precision_at_k(preds: list, targets: list, k: int): function compute_avg_rp_at_k (line 449) | def compute_avg_rp_at_k(records, k=5): FILE: abacus-research/cuad-demo.py function get_label_df (line 267) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame: function get_jaccard (line 325) | def get_jaccard(label, pred): function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok): function handle_empty_preds (line 401) | def handle_empty_preds(preds): class CUADValidator (line 413) | class CUADValidator(pz.Validator): method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42): method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method _compute_contract_id_to_labels (line 440) | def _compute_contract_id_to_labels(self): class CUADDataset (line 493) | class CUADDataset(pz.IterDataset): method __init__ (line 494) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:... method _construct_dataset (line 511) | def _construct_dataset(self, dataset, num_contracts, seed: int=42): method __len__ (line 544) | def __len__(self): method __getitem__ (line 547) | def __getitem__(self, idx: int): function compute_precision_recall (line 553) | def compute_precision_recall(label_df, preds_df): function parse_arguments (line 589) | def parse_arguments(): function build_cuad_query (line 665) | def build_cuad_query(dataset, mode): function main (line 691) | def main(): FILE: abacus-research/cuad-max-quality-at-cost.py function get_label_df (line 268) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame: function get_jaccard (line 325) | def get_jaccard(label, pred): function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok): function handle_empty_preds (line 401) | def handle_empty_preds(preds): class CUADValidator (line 413) | class CUADValidator(pz.Validator): method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42): method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method _compute_contract_id_to_labels (line 439) | def _compute_contract_id_to_labels(self): class CUADDataset (line 492) | class CUADDataset(pz.IterDataset): method __init__ (line 493) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:... method _construct_dataset (line 509) | def _construct_dataset(self, dataset, num_contracts, seed: int=42): method __len__ (line 542) | def __len__(self): method __getitem__ (line 545) | def __getitem__(self, idx: int): function compute_precision_recall (line 551) | def compute_precision_recall(label_df, preds_df): function parse_arguments (line 587) | def parse_arguments(): function build_cuad_query (line 650) | def build_cuad_query(dataset, mode): function main (line 677) | def main(): FILE: abacus-research/cuad_data_loader.py function load_cuad_data (line 14) | def load_cuad_data(split="test", data_dir=None): function get_unique_contracts (line 60) | def get_unique_contracts(dataset): function filter_by_contracts (line 69) | def filter_by_contracts(dataset, contract_titles): function sample_contracts (line 74) | def sample_contracts(dataset, num_contracts, seed=42): FILE: abacus-research/helper-scripts/generate-prior-stats-biodex-first-convert.py class BiodexDataset (line 26) | class BiodexDataset(pz.IterDataset): method __init__ (line 27) | def __init__( method compute_label (line 49) | def compute_label(self, entry: dict) -> dict: method term_recall (line 59) | def term_recall(preds: list | None, targets: list): method __len__ (line 89) | def __len__(self): method __getitem__ (line 92) | def __getitem__(self, idx: int): FILE: abacus-research/helper-scripts/generate-prior-stats-biodex.py class BiodexDataset (line 35) | class BiodexDataset(pz.IterDataset): method __init__ (line 36) | def __init__( method compute_label (line 58) | def compute_label(self, entry: dict) -> dict: method rank_precision_at_k (line 71) | def rank_precision_at_k(preds: list | None, targets: list, k: int): method term_recall (line 97) | def term_recall(preds: list | None, targets: list): method __len__ (line 127) | def __len__(self): method __getitem__ (line 130) | def __getitem__(self, idx: int): function search_func (line 202) | def search_func(index: chromadb.Collection, query: list[list[float]], k:... FILE: abacus-research/helper-scripts/generate-prior-stats-cuad.py function get_jaccard (line 273) | def get_jaccard(label, pred): function evaluate_entry (line 295) | def evaluate_entry(labels, preds, substr_ok): function handle_empty_preds (line 350) | def handle_empty_preds(preds): function compute_precision_recall (line 365) | def compute_precision_recall(label_df, preds_df): class CUADDataset (line 400) | class CUADDataset(pz.IterDataset): method __init__ (line 401) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:... method _construct_dataset (line 419) | def _construct_dataset(self, dataset, num_contracts, seed: int=42, inc... method __len__ (line 484) | def __len__(self): method __getitem__ (line 487) | def __getitem__(self, idx: int): method get_label_df (line 490) | def get_label_df(self): function parse_arguments (line 505) | def parse_arguments(): function build_cuad_query (line 513) | def build_cuad_query(dataset, mode): function main (line 540) | def main(): FILE: abacus-research/helper-scripts/mmqa-baseline.py function f1 (line 14) | def f1(preds: list | None, targets: list): FILE: abacus-research/mmqa-complex-demo.py function get_json_from_answer (line 57) | def get_json_from_answer(answer: str): class MMQAValidator (line 93) | class MMQAValidator(pz.Validator): method __init__ (line 94) | def __init__(self, dataset: list[dict]): method _compute_qid_to_labels (line 101) | def _compute_qid_to_labels(self) -> dict: method recall (line 121) | def recall(self, preds: list | None, targets: list): method f1 (line 151) | def f1(self, preds: list | None, targets: list): method map_score_fn (line 189) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method join_score_fn (line 196) | def join_score_fn(self, condition: str, left_input_record: dict, right... class MMQAQuestionDataset (line 213) | class MMQAQuestionDataset(pz.IterDataset): method __init__ (line 214) | def __init__(self, dataset: list[dict]): method __len__ (line 218) | def __len__(self): method __getitem__ (line 221) | def __getitem__(self, idx: int): class MMQATextDataset (line 225) | class MMQATextDataset(pz.IterDataset): method __init__ (line 226) | def __init__(self, dataset: list[dict]): method __len__ (line 245) | def __len__(self): method __getitem__ (line 248) | def __getitem__(self, idx: int): class MMQATableDataset (line 252) | class MMQATableDataset(pz.IterDataset): method __init__ (line 253) | def __init__(self, dataset: list[dict]): method __len__ (line 296) | def __len__(self): method __getitem__ (line 299) | def __getitem__(self, idx: int): class MMQAImageDataset (line 303) | class MMQAImageDataset(pz.IterDataset): method __init__ (line 304) | def __init__(self, dataset: list[dict]): method __len__ (line 341) | def __len__(self): method __getitem__ (line 344) | def __getitem__(self, idx: int): function get_dataset (line 348) | def get_dataset(split: str, shuffle: bool, seed: int, num_samples: int |... function compute_f1 (line 364) | def compute_f1(final_df, answers_df): FILE: abacus-research/mmqa-demo.py function get_json_from_answer (line 46) | def get_json_from_answer(answer: str): class MMQAValidator (line 82) | class MMQAValidator(pz.Validator): method __init__ (line 83) | def __init__( method _compute_qid_to_labels (line 113) | def _compute_qid_to_labels(self) -> dict: method recall (line 139) | def recall(self, preds: list | None, targets: list): method f1 (line 172) | def f1(self, preds: list | None, targets: list): method map_score_fn (line 213) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method topk_score_fn (line 218) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... class MMQADataset (line 235) | class MMQADataset(pz.IterDataset): method __init__ (line 236) | def __init__( method __len__ (line 265) | def __len__(self): method __getitem__ (line 268) | def __getitem__(self, idx: int): function compute_f1 (line 282) | def compute_f1(final_df, answers_df): function get_results_and_ids (line 456) | def get_results_and_ids(index: chromadb.Collection, query: list[list[flo... function text_search_func (line 482) | def text_search_func(index: chromadb.Collection, query: list[list[float]... function table_search_func (line 487) | def table_search_func(index: chromadb.Collection, query: list[list[float... function image_search_func (line 492) | def image_search_func(index: chromadb.Collection, query: list[list[float... FILE: abacus-research/score_biodex.py function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str): FILE: abacus-research/score_cuad.py function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str): FILE: abacus-research/score_mmqa.py function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str): FILE: abacus-research/score_mmqa_complex.py function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str): FILE: abacus-research/setup_cuad_data.py function setup_cuad_data (line 12) | def setup_cuad_data(): FILE: demos/audio-demo.py class SmallAudioDataset (line 8) | class SmallAudioDataset(pz.AudioFileDataset): method __init__ (line 9) | def __init__(self, *args, **kwargs): FILE: demos/caching-demo.py class TravelRequestDataset (line 169) | class TravelRequestDataset(pz.IterDataset): method __init__ (line 172) | def __init__(self, requests: List[str]): method __len__ (line 176) | def __len__(self): method __getitem__ (line 179) | def __getitem__(self, idx: int): function get_model_from_string (line 198) | def get_model_from_string(model_str: str) -> Model: function print_cache_stats (line 207) | def print_cache_stats(execution_stats): function main (line 247) | def main(): FILE: demos/demo_core.py function build_sci_paper_plan (line 30) | def build_sci_paper_plan(dataset): function build_test_pdf_plan (line 34) | def build_test_pdf_plan(dataset): function build_mit_battery_paper_plan (line 38) | def build_mit_battery_paper_plan(dataset): function build_enron_plan (line 45) | def build_enron_plan(dataset): function compute_enron_stats (line 49) | def compute_enron_stats(dataset): function enron_gby_plan (line 55) | def enron_gby_plan(dataset): function enron_count_plan (line 65) | def enron_count_plan(dataset): function enron_average_count_plan (line 75) | def enron_average_count_plan(dataset): function enron_limit_plan (line 90) | def enron_limit_plan(dataset, limit=5): function build_image_plan (line 96) | def build_image_plan(dataset): function build_image_agg_plan (line 103) | def build_image_agg_plan(dataset): function build_join_plan (line 115) | def build_join_plan(dataset1, dataset2): function build_join_image_plan (line 122) | def build_join_image_plan(dataset1, dataset2): function get_task_config (line 129) | def get_task_config(task, dataset, join_dataset=None): function execute_task (line 188) | def execute_task(task, dataset, policy, join_dataset=None, verbose=False... function format_results_table (line 206) | def format_results_table(records: list[DataRecord], cols=None): FILE: demos/enron-demo.py class EnronValidator (line 8) | class EnronValidator(pz.Validator): method __init__ (line 9) | def __init__(self, labels_file: str): method map_score_fn (line 17) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... class EnronDataset (line 27) | class EnronDataset(pz.IterDataset): method __init__ (line 28) | def __init__(self, dir: str, labels_file: str | None = None, split: st... method __len__ (line 37) | def __len__(self): method __getitem__ (line 40) | def __getitem__(self, idx: int): FILE: demos/image-demo.py function build_image_plan (line 22) | def build_image_plan(dataset): FILE: demos/join-demo.py function run_text_join (line 16) | def run_text_join(): function run_image_join (line 30) | def run_image_join(): function run_text_image_join (line 44) | def run_text_image_join(): FILE: demos/paper-demo.py function print_table (line 15) | def print_table(records, cols=None, plan_str=None): function within_two_miles_of_mit (line 50) | def within_two_miles_of_mit(record: dict): function in_price_range (line 59) | def in_price_range(record: dict): class RealEstateListingDataset (line 118) | class RealEstateListingDataset(pz.IterDataset): method __init__ (line 119) | def __init__(self, listings_dir): method __len__ (line 125) | def __len__(self): method __getitem__ (line 128) | def __getitem__(self, idx: int): FILE: demos/real-estate-demo.py function print_table (line 13) | def print_table(records, cols=None, plan_str=None): function within_two_miles_of_mit (line 48) | def within_two_miles_of_mit(record: dict): function in_price_range (line 57) | def in_price_range(record: dict): class RealEstateListingDataset (line 113) | class RealEstateListingDataset(pz.IterDataset): method __init__ (line 114) | def __init__(self, listings_dir): method __len__ (line 120) | def __len__(self): method __getitem__ (line 123) | def __getitem__(self, idx: int): FILE: demos/simple-demo.py function main (line 13) | def main(): FILE: demos/vllm-demo.py class SentimentResult (line 21) | class SentimentResult(BaseModel): function main (line 25) | def main(): FILE: evals/quest/eval.py function prepare_docs_for_query (line 11) | def prepare_docs_for_query(items: list, gt_docs: list) -> list: function palimpzest_run_query (line 22) | def palimpzest_run_query(query: dict, documents: list) -> list[str]: function main (line 56) | def main(): FILE: scripts/capture_litellm_stats.py class RawProviderStatsCapture (line 48) | class RawProviderStatsCapture(CustomLogger): method __init__ (line 57) | def __init__(self): method log_success_event (line 62) | def log_success_event(self, kwargs, response_obj, start_time, end_time): method log_failure_event (line 94) | def log_failure_event(self, kwargs, response_obj, start_time, end_time): method reset (line 100) | def reset(self): method get_captured_data (line 106) | def get_captured_data(self) -> dict[str, Any]: function load_messages (line 179) | def load_messages(modality: str, provider: str, messages_dir: str) -> li... function transform_messages_for_litellm (line 186) | def transform_messages_for_litellm(messages: list[dict]) -> list[dict]: function call_litellm_api (line 267) | def call_litellm_api( function capture_stats_for_provider (line 382) | def capture_stats_for_provider( function save_stats (line 425) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo... function main (line 436) | def main(): FILE: scripts/capture_provider_stats.py function detect_image_media_type (line 35) | def detect_image_media_type(base64_data: str) -> str: function load_messages (line 119) | def load_messages(modality: str, provider: str, messages_dir: str) -> li... function transform_messages_for_openai (line 126) | def transform_messages_for_openai(messages: list[dict]) -> list[dict]: function transform_messages_for_anthropic (line 228) | def transform_messages_for_anthropic(messages: list[dict]) -> tuple[str ... function transform_messages_for_gemini (line 311) | def transform_messages_for_gemini(messages: list[dict]) -> tuple[str | N... function call_openai_api (line 386) | def call_openai_api(messages: list[dict], model: str, cache_key: str | N... function call_azure_api (line 446) | def call_azure_api(messages: list[dict], model: str, cache_key: str | No... function call_anthropic_api (line 516) | def call_anthropic_api(messages: list[dict], model: str) -> dict[str, Any]: function call_gemini_api (line 565) | def call_gemini_api(messages: list[dict], model: str, use_vertex: bool =... function capture_stats_for_provider (line 668) | def capture_stats_for_provider( function save_stats (line 726) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo... function main (line 737) | def main(): FILE: scripts/generate_test_messages.py function generate_session_id (line 36) | def generate_session_id(provider: str, modality: str) -> str: class TextInputSchema (line 129) | class TextInputSchema(BaseModel): class ImageInputSchema (line 135) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 141) | class AudioInputSchema(BaseModel): class OutputSchema (line 154) | class OutputSchema(BaseModel): function save_messages (line 255) | def save_messages(modality: str, provider: str, messages: list[dict], ou... function main (line 283) | def main(): FILE: scripts/update_model_info.py function get_free_port (line 115) | def get_free_port() -> int: function extract_provider (line 121) | def extract_provider(model_id: str) -> str: function get_api_key_env_var (line 156) | def get_api_key_env_var(provider: str) -> str | None: function generate_config_yaml (line 160) | def generate_config_yaml(model_ids: list[str]) -> str: function fetch_dynamic_model_info (line 188) | def fetch_dynamic_model_info(model_ids: list[str]) -> dict[str, Any]: function fetch_litellm_data (line 262) | def fetch_litellm_data() -> dict[str, Any]: function load_existing_data (line 275) | def load_existing_data() -> dict[str, Any]: function save_data (line 282) | def save_data(data: dict[str, Any]) -> None: function derive_model_flags_with_provider (line 295) | def derive_model_flags_with_provider(model_id: str, provider: str) -> di... function prompt_for_value (line 307) | def prompt_for_value(field_name: str, current_value: Any, value_type: st... function review_field (line 331) | def review_field( function convert_and_review_model (line 368) | def convert_and_review_model( function update_model (line 539) | def update_model( function process_models (line 572) | def process_models( function main (line 625) | def main(): FILE: src/palimpzest/agents/search_agents.py class PZBaseAgent (line 71) | class PZBaseAgent(CodeAgent): method __init__ (line 72) | def __init__(self, run_id: str, context_description: str, *args, **kwa... method write_memory_to_messages (line 87) | def write_memory_to_messages( method _generate_planning_step (line 101) | def _generate_planning_step( method _step_stream (line 243) | def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessa... method _run_stream (line 378) | def _run_stream( method run (line 445) | def run( class PZBaseManagedAgent (line 545) | class PZBaseManagedAgent(PZBaseAgent): method __call__ (line 547) | def __call__(self, task: str, **kwargs): class DataDiscoveryAgent (line 569) | class DataDiscoveryAgent(PZBaseManagedAgent): method __init__ (line 570) | def __init__(self, run_id: str, context_description: str, *args, **kwa... class SearchManagerAgent (line 604) | class SearchManagerAgent(PZBaseAgent): method __init__ (line 605) | def __init__(self, run_id: str, context_description: str, *args, **kwa... FILE: src/palimpzest/constants.py class PromptStrategy (line 12) | class PromptStrategy(str, Enum): method is_agg_prompt (line 46) | def is_agg_prompt(self): method is_filter_prompt (line 49) | def is_filter_prompt(self): method is_join_prompt (line 52) | def is_join_prompt(self): method is_map_prompt (line 55) | def is_map_prompt(self): method is_critic_prompt (line 58) | def is_critic_prompt(self): method is_refine_prompt (line 61) | def is_refine_prompt(self): method is_moa_proposer_prompt (line 64) | def is_moa_proposer_prompt(self): method is_moa_aggregator_prompt (line 67) | def is_moa_aggregator_prompt(self): method is_split_proposer_prompt (line 70) | def is_split_proposer_prompt(self): method is_split_merger_prompt (line 73) | def is_split_merger_prompt(self): method is_no_reasoning_prompt (line 76) | def is_no_reasoning_prompt(self): class Modality (line 80) | class Modality(str, Enum): class AggFunc (line 86) | class AggFunc(str, Enum): class Cardinality (line 93) | class Cardinality(str, Enum): method _missing_ (line 98) | def _missing_(cls, value): class PickOutputStrategy (line 108) | class PickOutputStrategy(str, Enum): function log_attempt_number (line 135) | def log_attempt_number(retry_state): class Model (line 191) | class Model: method __init__ (line 199) | def __init__(self, model_id: str, api_base: str | None = None, **vllm_... method _get_litellm_model_specs (line 215) | def _get_litellm_model_specs(self, model_id: str) -> dict: method __lt__ (line 257) | def __lt__(self, other): method get_all_models (line 265) | def get_all_models(cls) -> list[Model]: method value (line 269) | def value(self) -> str: method provider (line 273) | def provider(self) -> str | None: method api_key_env_var (line 278) | def api_key_env_var(self) -> str | None: method __repr__ (line 295) | def __repr__(self) -> str: method __str__ (line 298) | def __str__(self) -> str: method __eq__ (line 301) | def __eq__(self, other: object) -> bool: method __hash__ (line 308) | def __hash__(self) -> int: method is_llama_model (line 311) | def is_llama_model(self) -> bool: method is_vllm_model (line 314) | def is_vllm_model(self) -> bool: method is_embedding_model (line 317) | def is_embedding_model(self) -> bool: method is_text_image_multimodal_embedding_model (line 320) | def is_text_image_multimodal_embedding_model(self) -> bool: method is_provider_vertex_ai (line 323) | def is_provider_vertex_ai(self) -> bool: method is_provider_anthropic (line 326) | def is_provider_anthropic(self) -> bool: method is_provider_google_ai_studio (line 329) | def is_provider_google_ai_studio(self) -> bool: method is_provider_openai (line 332) | def is_provider_openai(self) -> bool: method is_provider_azure (line 335) | def is_provider_azure(self) -> bool: method is_provider_together_ai (line 338) | def is_provider_together_ai(self) -> bool: method is_provider_deepseek (line 341) | def is_provider_deepseek(self) -> bool: method is_provider_ollama (line 344) | def is_provider_ollama(self) -> bool: method is_model_gemini (line 347) | def is_model_gemini(self) -> bool: method get_model_name (line 350) | def get_model_name(self) -> str: method is_o_model (line 353) | def is_o_model(self) -> bool: method is_gpt_5_model (line 356) | def is_gpt_5_model(self) -> bool: method is_reasoning_model (line 359) | def is_reasoning_model(self) -> bool: method is_text_model (line 362) | def is_text_model(self) -> bool: method is_vision_model (line 365) | def is_vision_model(self) -> bool: method is_audio_model (line 368) | def is_audio_model(self) -> bool: method is_text_image_multimodal_model (line 371) | def is_text_image_multimodal_model(self) -> bool: method is_text_audio_multimodal_model (line 374) | def is_text_audio_multimodal_model(self) -> bool: method supports_prompt_caching (line 377) | def supports_prompt_caching(self) -> bool: method get_usd_per_input_token (line 381) | def get_usd_per_input_token(self) -> float: method get_usd_per_audio_input_token (line 384) | def get_usd_per_audio_input_token(self) -> float: method get_usd_per_image_input_token (line 388) | def get_usd_per_image_input_token(self) -> float: method get_usd_per_cache_read_token (line 391) | def get_usd_per_cache_read_token(self) -> float: method get_usd_per_audio_cache_read_token (line 394) | def get_usd_per_audio_cache_read_token(self) -> float: method get_usd_per_image_cache_read_token (line 397) | def get_usd_per_image_cache_read_token(self) -> float: method get_usd_per_cached_token_per_hour (line 401) | def get_usd_per_cached_token_per_hour(self) -> float: method get_usd_per_cache_creation_token (line 404) | def get_usd_per_cache_creation_token(self) -> float: method get_usd_per_output_token (line 407) | def get_usd_per_output_token(self) -> float: method get_usd_per_audio_cache_creation_token (line 411) | def get_usd_per_audio_cache_creation_token(self) -> float: method get_usd_per_image_cache_creation_token (line 415) | def get_usd_per_image_cache_creation_token(self) -> float: method get_seconds_per_output_token (line 418) | def get_seconds_per_output_token(self) -> float: method get_overall_score (line 421) | def get_overall_score(self) -> float: FILE: src/palimpzest/core/data/context.py class Context (line 120) | class Context(Dataset, ABC): method __init__ (line 135) | def __init__( method description (line 175) | def description(self) -> str: method materialized (line 180) | def materialized(self) -> bool: method tools (line 185) | def tools(self) -> list[Callable]: method __str__ (line 189) | def __str__(self) -> str: method set_description (line 192) | def set_description(self, description: str) -> None: method set_materialized (line 198) | def set_materialized(self, materialized: str) -> None: method compute (line 204) | def compute(self, instruction: str) -> Context: method search (line 221) | def search(self, search_query: str) -> Context: class TextFileContext (line 236) | class TextFileContext(Context): method __init__ (line 237) | def __init__(self, path: str, id: str, description: str) -> None: method _check_filter_answer_text (line 271) | def _check_filter_answer_text(self, answer_text: str) -> dict | None: method _parse_filter_answer (line 288) | def _parse_filter_answer(self, completion_text: str) -> dict[str, list]: method tool_execute_semantic_operators (line 347) | def tool_execute_semantic_operators(self, instruction: str) -> str: FILE: src/palimpzest/core/data/context_manager.py class ContextNotFoundError (line 14) | class ContextNotFoundError(Exception): class ContextManager (line 18) | class ContextManager: method __init__ (line 24) | def __init__(self): method from_pkl (line 45) | def from_pkl(path: str) -> context.Context: method to_pkl (line 53) | def to_pkl(context: context.Context, path: str) -> None: method num_tokens_from_string (line 58) | def num_tokens_from_string(self, string: str, encoding_name: str) -> int: method add_context (line 64) | def add_context(self, context: context.Context, update: bool = False) ... method update_context (line 101) | def update_context(self, id: str, description: str, materialized: bool... method get_context (line 119) | def get_context(self, id: str) -> context.Context: method search_context (line 135) | def search_context(self, query: str, k: int = 1, where: dict | None = ... FILE: src/palimpzest/core/data/dataset.py class Dataset (line 36) | class Dataset: method __init__ (line 66) | def __init__( method id (line 105) | def id(self) -> str: method schema (line 110) | def schema(self) -> type[BaseModel]: method is_root (line 115) | def is_root(self) -> bool: method __str__ (line 118) | def __str__(self) -> str: method __iter__ (line 121) | def __iter__(self) -> Iterator[Dataset]: method _compute_dataset_id (line 126) | def _compute_dataset_id(self) -> str: method _set_root_datasets (line 136) | def _set_root_datasets(self, new_root_datasets: dict[str, Dataset]) ->... method _generate_unique_logical_op_ids (line 154) | def _generate_unique_logical_op_ids(self, topo_idx: int | None = None)... method _resolve_depends_on (line 178) | def _resolve_depends_on(self, depends_on: list[str]) -> list[str]: method _get_root_datasets (line 184) | def _get_root_datasets(self) -> dict[str, Dataset]: method relax_types (line 196) | def relax_types(self) -> None: method get_upstream_datasets (line 211) | def get_upstream_datasets(self) -> list[Dataset]: method get_limit (line 222) | def get_limit(self) -> int | None: method copy (line 238) | def copy(self): method join (line 246) | def join(self, other: Dataset, on: str | list[str], how: str = "inner"... method sem_join (line 269) | def sem_join(self, other: Dataset, condition: str, desc: str | None = ... method filter (line 292) | def filter( method sem_filter (line 317) | def sem_filter( method _sem_map (line 340) | def _sem_map(self, cols: list[dict] | type[BaseModel] | None, method sem_add_columns (line 373) | def sem_add_columns(self, cols: list[dict] | type[BaseModel], method sem_map (line 402) | def sem_map(self, cols: list[dict] | type[BaseModel], desc: str | None... method sem_flat_map (line 416) | def sem_flat_map(self, cols: list[dict] | type[BaseModel], desc: str |... method _map (line 432) | def _map(self, udf: Callable, method add_columns (line 464) | def add_columns(self, udf: Callable, method map (line 502) | def map(self, udf: Callable, method flat_map (line 526) | def flat_map(self, udf: Callable, method count (line 550) | def count(self) -> Dataset: method average (line 555) | def average(self) -> Dataset: method sum (line 560) | def sum(self) -> Dataset: method min (line 565) | def min(self) -> Dataset: method max (line 570) | def max(self) -> Dataset: method groupby (line 575) | def groupby(self, groupby: GroupBySig) -> Dataset: method sem_agg (line 580) | def sem_agg(self, col: dict | type[BaseModel], agg: str, depends_on: s... method sem_topk (line 611) | def sem_topk( method limit (line 650) | def limit(self, n: int) -> Dataset: method distinct (line 655) | def distinct(self, distinct_cols: list[str] | None = None) -> Dataset: method project (line 660) | def project(self, project_cols: list[str] | str) -> Dataset: method run (line 667) | def run(self, config: QueryProcessorConfig | None = None, **kwargs): method optimize_and_run (line 682) | def optimize_and_run(self, config: QueryProcessorConfig | None = None,... FILE: src/palimpzest/core/data/index_dataset.py function index_factory (line 8) | def index_factory(index: Collection) -> PZIndex: class BaseIndex (line 24) | class BaseIndex(ABC): method __init__ (line 26) | def __init__(self, index: Collection): method __str__ (line 29) | def __str__(self): method search (line 36) | def search(self, query_embedding: list[float] | list[list[float]], res... class ChromaIndex (line 53) | class ChromaIndex(BaseIndex): method __init__ (line 54) | def __init__(self, index: Collection): FILE: src/palimpzest/core/data/iter_dataset.py class IterDataset (line 33) | class IterDataset(dataset.Dataset, ABC): method __init__ (line 42) | def __init__(self, id: str, schema: type[BaseModel] | list[dict]) -> N... method __len__ (line 55) | def __len__(self) -> int: method __getitem__ (line 60) | def __getitem__(self, idx: int) -> dict: class BaseFileDataset (line 79) | class BaseFileDataset(IterDataset): method __init__ (line 85) | def __init__(self, path: str, **kwargs) -> None: method __len__ (line 110) | def __len__(self) -> int: class BaseFileDirectoryDataset (line 114) | class BaseFileDirectoryDataset(IterDataset): method __init__ (line 120) | def __init__(self, path: str, **kwargs) -> None: method __len__ (line 146) | def __len__(self) -> int: class MemoryDataset (line 152) | class MemoryDataset(IterDataset): method __init__ (line 162) | def __init__(self, id: str, vals: list | pd.DataFrame, schema: type[Ba... method __len__ (line 177) | def __len__(self) -> int: method __getitem__ (line 180) | def __getitem__(self, idx: int) -> dict: class HTMLFileDataset (line 216) | class HTMLFileDataset(BaseFileDataset): method __init__ (line 221) | def __init__(self, id: str, path: str) -> None: method _html_to_text_with_links (line 232) | def _html_to_text_with_links(self, html: str) -> str: method __getitem__ (line 248) | def __getitem__(self, idx: int) -> dict: class ImageFileDataset (line 284) | class ImageFileDataset(BaseFileDataset): method __init__ (line 289) | def __init__(self, id: str, path: str) -> None: method __getitem__ (line 300) | def __getitem__(self, idx: int) -> dict: class PDFFileDataset (line 326) | class PDFFileDataset(BaseFileDataset): method __init__ (line 333) | def __init__( method __getitem__ (line 354) | def __getitem__(self, idx: int) -> dict: class TextFileDataset (line 385) | class TextFileDataset(BaseFileDataset): method __init__ (line 390) | def __init__(self, id: str, path: str) -> None: method __getitem__ (line 400) | def __getitem__(self, idx: int) -> dict: class XLSFileDataset (line 425) | class XLSFileDataset(BaseFileDataset): method __init__ (line 430) | def __init__(self, id: str, path: str) -> None: method __getitem__ (line 437) | def __getitem__(self, idx: int) -> dict: class AudioFileDataset (line 472) | class AudioFileDataset(BaseFileDirectoryDataset): method __init__ (line 477) | def __init__(self, id: str, path: str) -> None: method __getitem__ (line 488) | def __getitem__(self, idx: int) -> dict: function get_local_source (line 514) | def get_local_source(id: str, path: str | Path, **kwargs) -> dataset.Dat... function resolve_datasource (line 542) | def resolve_datasource(id: str, source: str | Path | list | pd.DataFrame... FILE: src/palimpzest/core/elements/filters.py class Filter (line 11) | class Filter: method __init__ (line 14) | def __init__(self, filter_condition: str | None = None, filter_fn: Cal... method serialize (line 18) | def serialize(self) -> dict[str, Any]: method get_filter_str (line 24) | def get_filter_str(self) -> str: method __repr__ (line 27) | def __repr__(self) -> str: method __hash__ (line 30) | def __hash__(self) -> int: method __eq__ (line 34) | def __eq__(self, other) -> bool: method __str__ (line 42) | def __str__(self) -> str: FILE: src/palimpzest/core/elements/groupbysig.py class GroupBySig (line 21) | class GroupBySig: method __init__ (line 22) | def __init__(self, group_by_fields: list[str], agg_funcs: list[str], a... method validate_schema (line 27) | def validate_schema(self, input_schema: type[BaseModel]) -> tuple[bool... method serialize (line 36) | def serialize(self) -> dict[str, Any]: method __str__ (line 44) | def __str__(self) -> str: method __hash__ (line 47) | def __hash__(self) -> int: method __eq__ (line 51) | def __eq__(self, other) -> bool: method get_agg_field_names (line 55) | def get_agg_field_names(self) -> list[str]: method output_schema (line 62) | def output_schema(self) -> type[BaseModel]: FILE: src/palimpzest/core/elements/records.py class DataRecord (line 28) | class DataRecord: method __init__ (line 31) | def __init__( method __setattr__ (line 89) | def __setattr__(self, name: str, value: Any, /) -> None: method __getattr__ (line 96) | def __getattr__(self, name: str) -> Any: method __getitem__ (line 100) | def __getitem__(self, field: str) -> Any: method __setitem__ (line 104) | def __setitem__(self, field: str, value: Any) -> None: method __str__ (line 108) | def __str__(self, truncate: int | None = 15) -> str: method __repr__ (line 116) | def __repr__(self) -> str: method __eq__ (line 120) | def __eq__(self, other): method __hash__ (line 124) | def __hash__(self): method __iter__ (line 128) | def __iter__(self): method get_field_names (line 132) | def get_field_names(self): method get_field_type (line 136) | def get_field_type(self, field_name: str) -> FieldInfo: method schema (line 140) | def schema(self) -> type[BaseModel]: method copy (line 143) | def copy(self) -> DataRecord: method from_parent (line 164) | def from_parent( method from_agg_parents (line 208) | def from_agg_parents( method from_join_parents (line 229) | def from_join_parents( method to_df (line 278) | def to_df(records: list[DataRecord], project_cols: list[str] | None = ... method to_json_str (line 297) | def to_json_str(self, include_bytes: bool = True, bytes_to_str: bool =... method to_dict (line 302) | def to_dict(self, include_bytes: bool = True, bytes_to_str: bool = Fal... class DataRecordSet (line 343) | class DataRecordSet: method __init__ (line 351) | def __init__( method get_total_cost (line 378) | def get_total_cost(self) -> float: method get_field_to_score_fn (line 381) | def get_field_to_score_fn(self) -> dict[str, str | callable]: method __getitem__ (line 384) | def __getitem__(self, slice) -> DataRecord | list[DataRecord]: method __len__ (line 387) | def __len__(self) -> int: method __iter__ (line 390) | def __iter__(self) -> Generator[DataRecord]: class DataRecordCollection (line 394) | class DataRecordCollection: method __init__ (line 410) | def __init__(self, data_records: list[DataRecord], execution_stats: Ex... method __iter__ (line 416) | def __iter__(self) -> Generator[DataRecord]: method __len__ (line 420) | def __len__(self): method to_df (line 424) | def to_df(self, cols: list[str] | None = None): method _get_executed_plans (line 427) | def _get_executed_plans(self): FILE: src/palimpzest/core/lib/schemas.py function get_schema_field_names (line 60) | def get_schema_field_names(schema: type[BaseModel], id: str | None = Non... function _create_pickleable_model (line 65) | def _create_pickleable_model(fields: dict[str, tuple[type, FieldInfo]]) ... function relax_schema (line 90) | def relax_schema(model: type[BaseModel]) -> type[BaseModel]: function project (line 99) | def project(model: type[BaseModel], project_fields: list[str]) -> type[B... function create_schema_from_fields (line 114) | def create_schema_from_fields(fields: list[dict]) -> type[BaseModel]: function create_schema_from_df (line 132) | def create_schema_from_df(df: pd.DataFrame) -> type[BaseModel]: function union_schemas (line 145) | def union_schemas(models: list[type[BaseModel]], join: bool = False, on:... class DefaultSchema (line 181) | class DefaultSchema(BaseModel): class Download (line 185) | class Download(BaseModel): class File (line 191) | class File(BaseModel): class TextFile (line 200) | class TextFile(BaseModel): class Average (line 205) | class Average(BaseModel): class Count (line 208) | class Count(BaseModel): class Sum (line 211) | class Sum(BaseModel): class Min (line 214) | class Min(BaseModel): class Max (line 217) | class Max(BaseModel): class OperatorDerivedSchema (line 220) | class OperatorDerivedSchema(BaseModel): class Table (line 223) | class Table(BaseModel): class URL (line 230) | class URL(BaseModel): class WebPage (line 234) | class WebPage(BaseModel): class ImageFile (line 242) | class ImageFile(File): class AudioFile (line 246) | class AudioFile(File): class PDFFile (line 250) | class PDFFile(File): class XLSFile (line 255) | class XLSFile(File): class EquationImage (line 261) | class EquationImage(ImageFile): class PlotImage (line 265) | class PlotImage(ImageFile): FILE: src/palimpzest/core/models.py class GenerationStats (line 11) | class GenerationStats(BaseModel): method __iadd__ (line 59) | def __iadd__(self, other: GenerationStats) -> GenerationStats: method __add__ (line 66) | def __add__(self, other: GenerationStats) -> GenerationStats: method __itruediv__ (line 76) | def __itruediv__(self, quotient: float) -> GenerationStats: method __truediv__ (line 87) | def __truediv__(self, quotient: float) -> GenerationStats: method __radd__ (line 100) | def __radd__(self, other: int) -> GenerationStats: method to_json (line 106) | def to_json(self, filepath: str | None = None) -> dict | None: class RecordOpStats (line 114) | class RecordOpStats(BaseModel): class OperatorStats (line 227) | class OperatorStats(BaseModel): method __iadd__ (line 280) | def __iadd__(self, stats: OperatorStats | RecordOpStats) -> OperatorSt... class BasePlanStats (line 323) | class BasePlanStats(BaseModel): method start (line 385) | def start(self) -> None: method finish (line 389) | def finish(self) -> None: method from_plan (line 405) | def from_plan(plan) -> BasePlanStats: method sum_op_stats_field (line 412) | def sum_op_stats_field(self, field_name: str) -> float | int: method sum_validation_stats_field (line 416) | def sum_validation_stats_field(self, field_name: str) -> float | int: method add_record_op_stats (line 421) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:... method __iadd__ (line 428) | def __iadd__(self, plan_stats: BasePlanStats) -> None: method __str__ (line 435) | def __str__(self) -> str: method get_total_cost_so_far (line 441) | def get_total_cost_so_far(self) -> float: class PlanStats (line 448) | class PlanStats(BasePlanStats): method from_plan (line 453) | def from_plan(plan) -> PlanStats: method sum_op_stats_field (line 471) | def sum_op_stats_field(self, field_name: str) -> float | int: method add_record_op_stats (line 475) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:... method __iadd__ (line 489) | def __iadd__(self, plan_stats: PlanStats) -> None: method __str__ (line 512) | def __str__(self) -> str: class SentinelPlanStats (line 527) | class SentinelPlanStats(BasePlanStats): method from_plan (line 532) | def from_plan(plan) -> SentinelPlanStats: method sum_op_stats_field (line 552) | def sum_op_stats_field(self, field_name: str) -> float | int: method add_record_op_stats (line 556) | def add_record_op_stats(self, unique_logical_op_id: str, record_op_sta... method add_validation_gen_stats (line 574) | def add_validation_gen_stats(self, unique_logical_op_id: str, gen_stat... method __iadd__ (line 583) | def __iadd__(self, plan_stats: SentinelPlanStats) -> None: method __str__ (line 616) | def __str__(self) -> str: class ExecutionStats (line 635) | class ExecutionStats(BaseModel): method start (line 700) | def start(self) -> None: method finish_optimization (line 704) | def finish_optimization(self) -> None: method finish (line 717) | def finish(self) -> None: method sum_plan_stats_field (line 747) | def sum_plan_stats_field(self, field_name: str) -> float | int: method sum_sentinel_plan_costs (line 755) | def sum_sentinel_plan_costs(self) -> float: method sum_plan_costs (line 764) | def sum_plan_costs(self) -> float: method add_plan_stats (line 770) | def add_plan_stats(self, plan_stats: PlanStats | SentinelPlanStats | l... method to_json (line 794) | def to_json(self, filepath: str | None = None) -> dict | None: class OperatorCostEstimates (line 802) | class OperatorCostEstimates(BaseModel): method __rmul__ (line 843) | def __rmul__(self, multiplier: float) -> OperatorCostEstimates: method model_post_init (line 850) | def model_post_init(self, __context: Any) -> None: class PlanCost (line 868) | class PlanCost(BaseModel): method __hash__ (line 903) | def __hash__(self): method __eq__ (line 906) | def __eq__(self, other: Any) -> bool: method model_post_init (line 915) | def model_post_init(self, __context: Any) -> None: method join_add (line 928) | def join_add(self, left_plan_cost: PlanCost, right_plan_cost: PlanCost... method __iadd__ (line 969) | def __iadd__(self, other: PlanCost) -> PlanCost: method __add__ (line 989) | def __add__(self, other: PlanCost) -> PlanCost: FILE: src/palimpzest/policy.py function construct_policy_from_kwargs (line 8) | def construct_policy_from_kwargs(**kwargs) -> Policy | None: class Policy (line 64) | class Policy: method __init__ (line 73) | def __init__(self): method get_primary_metric (line 76) | def get_primary_metric(self) -> str: method get_dict (line 87) | def get_dict(self) -> dict: method constraint (line 94) | def constraint(self, plan: PlanCost) -> bool: method choose (line 101) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: method to_json_str (line 107) | def to_json_str(self) -> str: class MaxQuality (line 115) | class MaxQuality(Policy): method __str__ (line 121) | def __str__(self): method get_primary_metric (line 124) | def get_primary_metric(self) -> str: method get_dict (line 127) | def get_dict(self) -> dict: method constraint (line 130) | def constraint(self, plan: PlanCost) -> bool: method choose (line 134) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MinCost (line 147) | class MinCost(Policy): method __str__ (line 153) | def __str__(self): method get_primary_metric (line 156) | def get_primary_metric(self) -> str: method get_dict (line 159) | def get_dict(self) -> dict: method constraint (line 162) | def constraint(self, plan: PlanCost) -> bool: method choose (line 166) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MinTime (line 179) | class MinTime(Policy): method __str__ (line 185) | def __str__(self): method get_primary_metric (line 188) | def get_primary_metric(self) -> str: method get_dict (line 191) | def get_dict(self) -> dict: method constraint (line 194) | def constraint(self, plan: PlanCost) -> bool: method choose (line 198) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MaxQualityAtFixedCost (line 211) | class MaxQualityAtFixedCost(Policy): method __init__ (line 217) | def __init__(self, max_cost: float): method __str__ (line 220) | def __str__(self): method get_primary_metric (line 223) | def get_primary_metric(self) -> str: method get_dict (line 226) | def get_dict(self) -> dict: method constraint (line 229) | def constraint(self, plan: PlanCost) -> bool: method choose (line 232) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MaxQualityAtFixedTime (line 245) | class MaxQualityAtFixedTime(Policy): method __init__ (line 251) | def __init__(self, max_time: float): method __str__ (line 254) | def __str__(self): method get_primary_metric (line 257) | def get_primary_metric(self) -> str: method get_dict (line 260) | def get_dict(self) -> dict: method constraint (line 263) | def constraint(self, plan: PlanCost) -> bool: method choose (line 266) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MinCostAtFixedQuality (line 279) | class MinCostAtFixedQuality(Policy): method __init__ (line 285) | def __init__(self, min_quality: float): method __str__ (line 288) | def __str__(self): method get_primary_metric (line 291) | def get_primary_metric(self) -> str: method get_dict (line 294) | def get_dict(self) -> dict: method constraint (line 297) | def constraint(self, plan: PlanCost) -> bool: method choose (line 300) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: class MinTimeAtFixedQuality (line 313) | class MinTimeAtFixedQuality(Policy): method __init__ (line 319) | def __init__(self, min_quality: float): method __str__ (line 322) | def __str__(self): method get_primary_metric (line 325) | def get_primary_metric(self) -> str: method get_dict (line 328) | def get_dict(self) -> dict: method constraint (line 331) | def constraint(self, plan: PlanCost) -> bool: method choose (line 334) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float: FILE: src/palimpzest/prompts/prompt_factory.py function _detect_image_media_type (line 144) | def _detect_image_media_type(filepath: str | None = None, base64_data: s... class PromptFactory (line 170) | class PromptFactory: method __init__ (line 232) | def __init__(self, prompt_strategy: PromptStrategy, model: Model, card... method _get_context (line 238) | def _get_context(self, candidate: DataRecord | list[DataRecord], input... method _get_input_fields (line 289) | def _get_input_fields(self, candidate: DataRecord, **kwargs) -> list[s... method _get_input_modalities (line 308) | def _get_input_modalities(self, candidate: DataRecord, input_fields: l... method _get_modalities_str (line 331) | def _get_modalities_str(self, input_modalities: set[Modality]) -> str: method _get_input_fields_desc (line 356) | def _get_input_fields_desc(self, candidate: DataRecord, input_fields: ... method _get_output_fields_desc (line 372) | def _get_output_fields_desc(self, output_fields: list[str], **kwargs) ... method _get_agg_instruction (line 395) | def _get_agg_instruction(self, **kwargs) -> str | None: method _get_filter_condition (line 408) | def _get_filter_condition(self, **kwargs) -> str | None: method _get_join_condition (line 421) | def _get_join_condition(self, **kwargs) -> str | None: method _get_original_output (line 434) | def _get_original_output(self, **kwargs) -> str | None: method _get_critique_output (line 452) | def _get_critique_output(self, **kwargs) -> str | None: method _get_model_responses (line 468) | def _get_model_responses(self, **kwargs) -> str | None: method _get_chunk_outputs (line 487) | def _get_chunk_outputs(self, **kwargs) -> str | None: method _get_output_format_instruction (line 506) | def _get_output_format_instruction(self) -> str: method _get_job_instruction (line 519) | def _get_job_instruction(self, input_modalities: set[Modality]) -> str... method _get_desc_section (line 549) | def _get_desc_section(self) -> str: method _get_critique_criteria (line 562) | def _get_critique_criteria(self) -> str | None: method _get_refinement_criteria (line 575) | def _get_refinement_criteria(self) -> str | None: method _get_finish_instruction (line 588) | def _get_finish_instruction(self) -> str | None: method _get_example_input_fields (line 603) | def _get_example_input_fields(self, input_modalities: set[Modality], r... method _get_example_output_fields (line 627) | def _get_example_output_fields(self, input_modalities: set[Modality]) ... method _get_example_context (line 650) | def _get_example_context(self, input_modalities: set[Modality], right:... method _get_image_disclaimer (line 684) | def _get_image_disclaimer(self, input_modalities: set[Modality], right... method _get_audio_disclaimer (line 697) | def _get_audio_disclaimer(self, input_modalities: set[Modality], right... method _get_example_reasoning (line 710) | def _get_example_reasoning(self, input_modalities: set[Modality]) -> str: method _get_example_answer (line 737) | def _get_example_answer(self, input_modalities: set[Modality]) -> str: method _get_all_format_kwargs (line 763) | def _get_all_format_kwargs( method _create_audio_messages (line 837) | def _create_audio_messages(self, candidate: DataRecord | list[DataReco... method _create_image_messages (line 893) | def _create_image_messages(self, candidate: DataRecord | list[DataReco... method _get_system_prompt (line 963) | def _get_system_prompt(self, **format_kwargs) -> str | None: method _get_user_messages (line 980) | def _get_user_messages(self, candidate: DataRecord | list[DataRecord],... method create_messages (line 1074) | def create_messages(self, candidate: DataRecord | list[DataRecord], ou... FILE: src/palimpzest/prompts/prompt_manager.py class PromptManager (line 17) | class PromptManager: method __init__ (line 30) | def __init__(self, model: Model): method get_cache_kwargs (line 35) | def get_cache_kwargs(self) -> dict[str, Any]: method inject_cache_isolation_id (line 51) | def inject_cache_isolation_id(self, messages: list[dict], session_id: ... method update_messages_for_caching (line 65) | def update_messages_for_caching(self, messages: list[dict]) -> list[di... method extract_usage_stats (line 92) | def extract_usage_stats(self, usage: dict, is_audio_op: bool) -> dict[... method _remove_cache_boundary_markers (line 149) | def _remove_cache_boundary_markers(self, messages: list[dict]) -> list... method _transform_messages_for_anthropic (line 173) | def _transform_messages_for_anthropic(self, messages: list[dict]) -> l... FILE: src/palimpzest/query/execution/all_sample_execution_strategy.py class OpSet (line 20) | class OpSet: method __init__ (line 29) | def __init__(self, op_set: list[PhysicalOperator], source_unique_logic... method get_op_inputs (line 48) | def get_op_inputs(self) -> list[PhysicalOperator, DataRecord | int | N... method pick_highest_quality_output (line 103) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]... method update_inputs (line 141) | def update_inputs(self, source_idx_to_record_sets: dict[int, DataRecor... class AllSamplingExecutionStrategy (line 153) | class AllSamplingExecutionStrategy(SentinelExecutionStrategy): method _execute_sentinel_plan (line 155) | def _execute_sentinel_plan(self, method execute_sentinel_plan (line 208) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic... FILE: src/palimpzest/query/execution/execution_strategy.py class BaseExecutionStrategy (line 25) | class BaseExecutionStrategy: method __init__ (line 26) | def __init__(self, class ExecutionStrategy (line 43) | class ExecutionStrategy(BaseExecutionStrategy, ABC): method __init__ (line 46) | def __init__(self, *args, **kwargs): method execute_plan (line 52) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ... method _create_input_queues (line 56) | def _create_input_queues(self, plan: PhysicalPlan) -> dict[str, dict[s... class SentinelExecutionStrategy (line 77) | class SentinelExecutionStrategy(BaseExecutionStrategy, ABC): method __init__ (line 83) | def __init__( method _score_quality (line 117) | def _score_quality( method _execute_op_set (line 275) | def _execute_op_set(self, unique_logical_op_id: str, op_inputs: list[t... method _is_llm_op (line 345) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool: method execute_sentinel_plan (line 353) | def execute_sentinel_plan(self, sentinel_plan: SentinelPlan, train_dat... FILE: src/palimpzest/query/execution/execution_strategy_type.py class ExecutionStrategyType (line 12) | class ExecutionStrategyType(Enum): method is_fully_parallel (line 18) | def is_fully_parallel(self) -> bool: class SentinelExecutionStrategyType (line 22) | class SentinelExecutionStrategyType(Enum): FILE: src/palimpzest/query/execution/mab_execution_strategy.py class OpFrontier (line 27) | class OpFrontier: method __init__ (line 36) | def __init__( method get_frontier_ops (line 96) | def get_frontier_ops(self) -> list[PhysicalOperator]: method get_off_frontier_ops (line 102) | def get_off_frontier_ops(self) -> list[PhysicalOperator]: method _compute_op_id_to_pareto_distance (line 108) | def _compute_op_id_to_pareto_distance(self, priors: dict[str, dict[str... method _compute_naive_priors (line 168) | def _compute_naive_priors(self, op_set: list[PhysicalOperator]) -> dic... method _get_op_index_order (line 191) | def _get_op_index_order(self, op_set: list[PhysicalOperator], seed: in... method _get_op_source_indices_pairs (line 258) | def _get_op_source_indices_pairs(self) -> list[tuple[PhysicalOperator,... method get_source_indices_for_next_iteration (line 282) | def get_source_indices_for_next_iteration(self) -> set[tuple[str]]: method get_frontier_op_inputs (line 289) | def get_frontier_op_inputs(self, source_indices_to_sample: set[tuple[s... method update_frontier (line 369) | def update_frontier(self, unique_logical_op_id: str, plan_stats: Senti... method pick_highest_quality_output (line 581) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]... method update_inputs (line 619) | def update_inputs(self, source_unique_logical_op_id: str, source_indic... class MABExecutionStrategy (line 631) | class MABExecutionStrategy(SentinelExecutionStrategy): method _remove_filtered_records_from_downstream_ops (line 639) | def _remove_filtered_records_from_downstream_ops(self, topo_idx: int, ... method _get_max_quality_op (line 661) | def _get_max_quality_op(self, unique_logical_op_id: str, op_frontiers:... method _compute_termination_condition (line 692) | def _compute_termination_condition(self, samples_drawn: int, sampling_... method _execute_sentinel_plan (line 695) | def _execute_sentinel_plan( method execute_sentinel_plan (line 791) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic... FILE: src/palimpzest/query/execution/parallel_execution_strategy.py class ParallelExecutionStrategy (line 19) | class ParallelExecutionStrategy(ExecutionStrategy): method __init__ (line 24) | def __init__(self, *args, **kwargs): method _any_queue_not_empty (line 27) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic... method _upstream_ops_finished (line 37) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id... method _finish_outer_join (line 44) | def _finish_outer_join(self, executor: ThreadPoolExecutor, plan: Physi... method _process_future_results (line 58) | def _process_future_results(self, unique_full_op_id: str, future_queue... method _execute_plan (line 101) | def _execute_plan( method execute_plan (line 235) | def execute_plan(self, plan: PhysicalPlan): FILE: src/palimpzest/query/execution/single_threaded_execution_strategy.py class SequentialSingleThreadExecutionStrategy (line 15) | class SequentialSingleThreadExecutionStrategy(ExecutionStrategy): method __init__ (line 25) | def __init__(self, *args, **kwargs): method _execute_plan (line 29) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di... method execute_plan (line 116) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ... class PipelinedSingleThreadExecutionStrategy (line 149) | class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy): method __init__ (line 163) | def __init__(self, *args, **kwargs): method _any_queue_not_empty (line 167) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic... method _upstream_ops_finished (line 177) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id... method _execute_plan (line 184) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di... method execute_plan (line 284) | def execute_plan(self, plan: PhysicalPlan): FILE: src/palimpzest/query/generators/gemini_client.py class GeminiResponse (line 24) | class GeminiResponse: class GeminiClient (line 31) | class GeminiClient: method get_instance (line 57) | def get_instance(cls, model: str, use_vertex: bool = False) -> GeminiC... method __init__ (line 64) | def __init__(self, model: str, use_vertex: bool = False): method _detect_image_media_type (line 70) | def _detect_image_media_type(self, base64_data: str) -> str: method _transform_messages (line 86) | def _transform_messages(self, messages: list[dict]) -> tuple[str | Non... method _extract_usage_stats (line 182) | def _extract_usage_stats(self, usage_metadata: Any) -> dict: method generate (line 243) | def generate( FILE: src/palimpzest/query/generators/generators.py function get_json_from_answer (line 34) | def get_json_from_answer(answer: str, model: Model, cardinality: Cardina... class Generator (line 98) | class Generator(Generic[ContextType, InputType]): method __init__ (line 103) | def __init__( method _parse_reasoning (line 131) | def _parse_reasoning(self, completion_text: str, **kwargs) -> str: method _prepare_field_answers (line 149) | def _prepare_field_answers(self, field_answers: dict | list[dict], fie... method _check_convert_answer_text (line 171) | def _check_convert_answer_text(self, answer_text: str, fields: dict[st... method _check_bool_answer_text (line 188) | def _check_bool_answer_text(self, answer_text: str, throw_exception: b... method _parse_convert_answer (line 206) | def _parse_convert_answer(self, completion_text: str, fields: dict[str... method _parse_bool_answer (line 245) | def _parse_bool_answer(self, completion_text: str, json_output: bool) ... method _parse_answer (line 284) | def _parse_answer(self, completion_text: str, fields: dict[str, FieldI... method __call__ (line 303) | def __call__(self, candidate: DataRecord | list[DataRecord], fields: d... FILE: src/palimpzest/query/operators/aggregate.py class AggregateOp (line 23) | class AggregateOp(PhysicalOperator): method __call__ (line 29) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class ApplyGroupByOp (line 33) | class ApplyGroupByOp(AggregateOp): method __init__ (line 39) | def __init__(self, group_by_sig: GroupBySig, *args, **kwargs): method __str__ (line 43) | def __str__(self): method get_id_params (line 48) | def get_id_params(self): method get_op_params (line 52) | def get_op_params(self): method naive_cost_estimates (line 56) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method agg_init (line 66) | def agg_init(func): method agg_merge (line 85) | def agg_merge(func, state, val): method agg_final (line 115) | def agg_final(func, state): method __call__ (line 124) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class AverageAggregateOp (line 192) | class AverageAggregateOp(AggregateOp): method __init__ (line 195) | def __init__(self, agg_func: AggFunc, *args, **kwargs): method __str__ (line 215) | def __str__(self): method get_id_params (line 220) | def get_id_params(self): method get_op_params (line 224) | def get_op_params(self): method naive_cost_estimates (line 228) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 237) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class SumAggregateOp (line 271) | class SumAggregateOp(AggregateOp): method __init__ (line 274) | def __init__(self, agg_func: AggFunc, *args, **kwargs): method __str__ (line 294) | def __str__(self): method get_id_params (line 299) | def get_id_params(self): method get_op_params (line 303) | def get_op_params(self): method naive_cost_estimates (line 307) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 316) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class CountAggregateOp (line 347) | class CountAggregateOp(AggregateOp): method __init__ (line 350) | def __init__(self, agg_func: AggFunc, *args, **kwargs): method __str__ (line 358) | def __str__(self): method get_id_params (line 363) | def get_id_params(self): method get_op_params (line 367) | def get_op_params(self): method naive_cost_estimates (line 371) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 380) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class MinAggregateOp (line 404) | class MinAggregateOp(AggregateOp): method __init__ (line 407) | def __init__(self, agg_func: AggFunc, *args, **kwargs): method __str__ (line 415) | def __str__(self): method get_id_params (line 420) | def get_id_params(self): method get_op_params (line 424) | def get_op_params(self): method naive_cost_estimates (line 428) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 437) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class MaxAggregateOp (line 467) | class MaxAggregateOp(AggregateOp): method __init__ (line 470) | def __init__(self, agg_func: AggFunc, *args, **kwargs): method __str__ (line 478) | def __str__(self): method get_id_params (line 483) | def get_id_params(self): method get_op_params (line 487) | def get_op_params(self): method naive_cost_estimates (line 491) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 500) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: class SemanticAggregate (line 531) | class SemanticAggregate(AggregateOp): method __init__ (line 533) | def __init__(self, agg_str: str, model: Model, prompt_strategy: Prompt... method __str__ (line 543) | def __str__(self): method get_id_params (line 550) | def get_id_params(self): method get_op_params (line 562) | def get_op_params(self): method get_model_name (line 574) | def get_model_name(self) -> str: method naive_cost_estimates (line 577) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 611) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet: FILE: src/palimpzest/query/operators/compute.py function make_tool (line 17) | def make_tool(bound_method): class SmolAgentsCompute (line 38) | class SmolAgentsCompute(PhysicalOperator): method __init__ (line 41) | def __init__(self, context_id: str, instruction: str, additional_conte... method __str__ (line 52) | def __str__(self): method get_id_params (line 59) | def get_id_params(self): method get_op_params (line 68) | def get_op_params(self): method naive_cost_estimates (line 77) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method _create_record_set (line 85) | def _create_record_set( method __call__ (line 129) | def __call__(self, candidate: DataRecord) -> Any: FILE: src/palimpzest/query/operators/convert.py class ConvertOp (line 23) | class ConvertOp(PhysicalOperator, ABC): method __init__ (line 24) | def __init__( method get_id_params (line 37) | def get_id_params(self): method get_op_params (line 48) | def get_op_params(self): method _create_data_records_from_field_answers (line 59) | def _create_data_records_from_field_answers( method _create_record_set (line 90) | def _create_record_set( method convert (line 142) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... method __call__ (line 163) | def __call__(self, candidate: DataRecord) -> DataRecordSet: class NonLLMConvert (line 198) | class NonLLMConvert(ConvertOp): method __str__ (line 199) | def __str__(self): method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method convert (line 224) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... class LLMConvert (line 262) | class LLMConvert(ConvertOp): method __init__ (line 267) | def __init__( method __str__ (line 282) | def __str__(self): method get_id_params (line 288) | def get_id_params(self): method get_op_params (line 299) | def get_op_params(self): method get_model_name (line 310) | def get_model_name(self): method naive_cost_estimates (line 313) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... class LLMConvertBonded (line 352) | class LLMConvertBonded(LLMConvert): method convert (line 354) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... FILE: src/palimpzest/query/operators/critique_and_refine.py class CritiqueAndRefineConvert (line 18) | class CritiqueAndRefineConvert(LLMConvert): method __init__ (line 20) | def __init__( method __str__ (line 35) | def __str__(self): method get_id_params (line 41) | def get_id_params(self): method get_op_params (line 51) | def get_op_params(self): method naive_cost_estimates (line 61) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method convert (line 81) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... class CritiqueAndRefineFilter (line 106) | class CritiqueAndRefineFilter(LLMFilter): method __init__ (line 108) | def __init__( method __str__ (line 123) | def __str__(self): method get_id_params (line 129) | def get_id_params(self): method get_op_params (line 139) | def get_op_params(self): method naive_cost_estimates (line 149) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method filter (line 169) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... FILE: src/palimpzest/query/operators/distinct.py class DistinctOp (line 8) | class DistinctOp(PhysicalOperator): method __init__ (line 9) | def __init__(self, distinct_cols: list[str], distinct_seen: set | None... method __str__ (line 14) | def __str__(self): method get_id_params (line 19) | def get_id_params(self): method get_op_params (line 23) | def get_op_params(self): method naive_cost_estimates (line 27) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 36) | def __call__(self, candidate: DataRecord) -> DataRecordSet: FILE: src/palimpzest/query/operators/filter.py class FilterOp (line 23) | class FilterOp(PhysicalOperator, ABC): method __init__ (line 24) | def __init__(self, filter: Filter, desc: str | None = None, *args, **k... method __str__ (line 30) | def __str__(self): method get_id_params (line 35) | def get_id_params(self): method get_op_params (line 39) | def get_op_params(self): method filter (line 44) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... method _create_record_set (line 60) | def _create_record_set( method __call__ (line 107) | def __call__(self, candidate: DataRecord) -> DataRecordSet: class NonLLMFilter (line 125) | class NonLLMFilter(FilterOp): method naive_cost_estimates (line 127) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method filter (line 143) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... class LLMFilter (line 165) | class LLMFilter(FilterOp): method __init__ (line 166) | def __init__( method get_id_params (line 181) | def get_id_params(self): method get_op_params (line 192) | def get_op_params(self): method get_model_name (line 203) | def get_model_name(self): method naive_cost_estimates (line 206) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method filter (line 247) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... FILE: src/palimpzest/query/operators/join.py class Singleton (line 29) | class Singleton: method __new__ (line 30) | def __new__(cls, *args, **kw): class Locks (line 36) | class Locks(Singleton): method get_model (line 42) | def get_model(cls, model_name: str): function compute_similarity (line 48) | def compute_similarity(left_embedding: list[float], right_embedding: lis... class JoinOp (line 55) | class JoinOp(PhysicalOperator, ABC): method __init__ (line 56) | def __init__( method __str__ (line 86) | def __str__(self): method get_id_params (line 93) | def get_id_params(self): method get_op_params (line 105) | def get_op_params(self): method _compute_unmatched_records (line 118) | def _compute_unmatched_records(self) -> DataRecordSet: method naive_cost_estimates (line 172) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator... method set_finished (line 175) | def set_finished(self): class RelationalJoin (line 179) | class RelationalJoin(JoinOp): method get_model_name (line 181) | def get_model_name(self): method _process_join_candidate_pair (line 184) | def _process_join_candidate_pair(self, left_candidate, right_candidate... method naive_cost_estimates (line 228) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator... method __call__ (line 243) | def __call__(self, left_candidates: list[DataRecord], right_candidates... class LLMJoin (line 290) | class LLMJoin(JoinOp): method __init__ (line 291) | def __init__( method __str__ (line 305) | def __str__(self): method get_id_params (line 312) | def get_id_params(self): method get_op_params (line 322) | def get_op_params(self): method get_model_name (line 332) | def get_model_name(self): method _process_join_candidate_pair (line 335) | def _process_join_candidate_pair( class NestedLoopsJoin (line 395) | class NestedLoopsJoin(LLMJoin): method naive_cost_estimates (line 397) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator... method __call__ (line 439) | def __call__(self, left_candidates: list[DataRecord], right_candidates... class EmbeddingJoin (line 492) | class EmbeddingJoin(LLMJoin): method __init__ (line 495) | def __init__( method __str__ (line 529) | def __str__(self): method get_id_params (line 535) | def get_id_params(self): method get_op_params (line 545) | def get_op_params(self): method naive_cost_estimates (line 555) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator... method _compute_embeddings (line 588) | def _compute_embeddings(self, candidates: list[DataRecord], input_fiel... method _process_join_candidate_pair (line 636) | def _process_join_candidate_pair(self, left_candidate, right_candidate... method _process_join_candidate_with_sim (line 640) | def _process_join_candidate_with_sim(self, left_candidate: DataRecord,... method __call__ (line 675) | def __call__(self, left_candidates: list[DataRecord], right_candidates... FILE: src/palimpzest/query/operators/limit.py class LimitScanOp (line 8) | class LimitScanOp(PhysicalOperator): method __init__ (line 9) | def __init__(self, limit: int, *args, **kwargs): method __str__ (line 13) | def __str__(self): method get_id_params (line 18) | def get_id_params(self): method get_op_params (line 22) | def get_op_params(self): method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet: FILE: src/palimpzest/query/operators/logical.py class LogicalOperator (line 16) | class LogicalOperator: method __init__ (line 38) | def __init__( method __str__ (line 57) | def __str__(self) -> str: method __eq__ (line 60) | def __eq__(self, other) -> bool: method copy (line 64) | def copy(self) -> LogicalOperator: method logical_op_name (line 70) | def logical_op_name(self) -> str: method get_unique_logical_op_id (line 74) | def get_unique_logical_op_id(self) -> str: method set_unique_logical_op_id (line 80) | def set_unique_logical_op_id(self, unique_logical_op_id: str) -> None: method get_logical_id_params (line 87) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 99) | def get_logical_op_params(self) -> dict: method get_logical_op_id (line 112) | def get_logical_op_id(self): method get_generated_fields (line 134) | def get_generated_fields(self) -> list[str]: method __hash__ (line 138) | def __hash__(self): class Aggregate (line 144) | class Aggregate(LogicalOperator): method __init__ (line 150) | def __init__( method __str__ (line 176) | def __str__(self): method get_logical_id_params (line 180) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 190) | def get_logical_op_params(self) -> dict: class BaseScan (line 201) | class BaseScan(LogicalOperator): method __init__ (line 204) | def __init__(self, datasource: dataset.Dataset, output_schema: type[Ba... method __str__ (line 208) | def __str__(self): method __eq__ (line 211) | def __eq__(self, other) -> bool: method get_logical_id_params (line 219) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 228) | def get_logical_op_params(self) -> dict: class ContextScan (line 235) | class ContextScan(LogicalOperator): method __init__ (line 238) | def __init__(self, context: context.Context, output_schema: type[BaseM... method __str__ (line 242) | def __str__(self): method __eq__ (line 245) | def __eq__(self, other) -> bool: method get_logical_id_params (line 251) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 260) | def get_logical_op_params(self) -> dict: class ConvertScan (line 267) | class ConvertScan(LogicalOperator): method __init__ (line 270) | def __init__( method __str__ (line 283) | def __str__(self): method get_logical_id_params (line 286) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 297) | def get_logical_op_params(self) -> dict: class Distinct (line 309) | class Distinct(LogicalOperator): method __init__ (line 310) | def __init__(self, distinct_cols: list[str] | None, *args, **kwargs): method __str__ (line 324) | def __str__(self): method get_logical_id_params (line 327) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 333) | def get_logical_op_params(self) -> dict: class FilteredScan (line 343) | class FilteredScan(LogicalOperator): method __init__ (line 346) | def __init__( method __str__ (line 357) | def __str__(self): method get_logical_id_params (line 360) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 370) | def get_logical_op_params(self) -> dict: class GroupByAggregate (line 381) | class GroupByAggregate(LogicalOperator): method __init__ (line 382) | def __init__( method __str__ (line 396) | def __str__(self): method get_logical_id_params (line 399) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 405) | def get_logical_op_params(self) -> dict: class JoinOp (line 415) | class JoinOp(LogicalOperator): method __init__ (line 416) | def __init__(self, condition: str, on: list[str] | None = None, how: s... method __str__ (line 423) | def __str__(self): method get_logical_id_params (line 426) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 438) | def get_logical_op_params(self) -> dict: class LimitScan (line 451) | class LimitScan(LogicalOperator): method __init__ (line 452) | def __init__(self, limit: int, *args, **kwargs): method __str__ (line 456) | def __str__(self): method get_logical_id_params (line 459) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 465) | def get_logical_op_params(self) -> dict: class Project (line 475) | class Project(LogicalOperator): method __init__ (line 476) | def __init__(self, project_cols: list[str], *args, **kwargs): method __str__ (line 480) | def __str__(self): method get_logical_id_params (line 483) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 489) | def get_logical_op_params(self) -> dict: class TopKScan (line 499) | class TopKScan(LogicalOperator): method __init__ (line 502) | def __init__( method __str__ (line 519) | def __str__(self): method get_logical_id_params (line 522) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 536) | def get_logical_op_params(self) -> dict: class ComputeOperator (line 550) | class ComputeOperator(LogicalOperator): method __init__ (line 556) | def __init__(self, context_id: str, instruction: str, *args, **kwargs): method __str__ (line 561) | def __str__(self): method get_logical_id_params (line 564) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 574) | def get_logical_op_params(self) -> dict: class SearchOperator (line 585) | class SearchOperator(LogicalOperator): method __init__ (line 591) | def __init__(self, context_id: str, search_query: str, *args, **kwargs): method __str__ (line 596) | def __str__(self): method get_logical_id_params (line 599) | def get_logical_id_params(self) -> dict: method get_logical_op_params (line 609) | def get_logical_op_params(self) -> dict: FILE: src/palimpzest/query/operators/mixture_of_agents.py class MixtureOfAgentsConvert (line 16) | class MixtureOfAgentsConvert(LLMConvert): method __init__ (line 18) | def __init__( method __str__ (line 41) | def __str__(self): method get_id_params (line 48) | def get_id_params(self): method get_op_params (line 59) | def get_op_params(self): method naive_cost_estimates (line 70) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method convert (line 106) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... class MixtureOfAgentsFilter (line 133) | class MixtureOfAgentsFilter(LLMFilter): method __init__ (line 135) | def __init__( method __str__ (line 158) | def __str__(self): method get_id_params (line 165) | def get_id_params(self): method get_op_params (line 176) | def get_op_params(self): method naive_cost_estimates (line 187) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method filter (line 221) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... FILE: src/palimpzest/query/operators/physical.py class PhysicalOperator (line 14) | class PhysicalOperator: method __init__ (line 21) | def __init__( method __str__ (line 71) | def __str__(self): method __eq__ (line 82) | def __eq__(self, other) -> bool: method copy (line 85) | def copy(self) -> PhysicalOperator: method op_name (line 88) | def op_name(self) -> str: method get_id_params (line 92) | def get_id_params(self) -> dict: method get_op_params (line 106) | def get_op_params(self) -> dict: method get_op_id (line 123) | def get_op_id(self): method get_logical_op_id (line 148) | def get_logical_op_id(self) -> str: method get_unique_logical_op_id (line 151) | def get_unique_logical_op_id(self) -> str: method get_full_op_id (line 154) | def get_full_op_id(self): method is_image_op (line 157) | def is_image_op(self) -> bool: method is_audio_op (line 161) | def is_audio_op(self) -> bool: method __hash__ (line 165) | def __hash__(self): method get_model_name (line 168) | def get_model_name(self) -> str | None: method get_input_fields (line 172) | def get_input_fields(self): method get_fields_to_generate (line 187) | def get_fields_to_generate(self, candidate: DataRecord) -> list[str]: method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 224) | def __call__(self, candidate: DataRecord) -> DataRecordSet: FILE: src/palimpzest/query/operators/project.py class ProjectOp (line 8) | class ProjectOp(PhysicalOperator): method __init__ (line 9) | def __init__(self, project_cols: list[str], *args, **kwargs): method __str__ (line 13) | def __str__(self): method get_id_params (line 18) | def get_id_params(self): method get_op_params (line 22) | def get_op_params(self): method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet: FILE: src/palimpzest/query/operators/rag.py class RAGConvert (line 18) | class RAGConvert(LLMConvert): method __init__ (line 19) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ... method __str__ (line 28) | def __str__(self): method get_id_params (line 35) | def get_id_params(self): method get_op_params (line 45) | def get_op_params(self): method naive_cost_estimates (line 55) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method chunk_text (line 84) | def chunk_text(self, text: str, chunk_size: int) -> list[str]: method compute_embedding (line 99) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio... method compute_similarity (line 129) | def compute_similarity(self, query_embedding: list[float], chunk_embed... method get_chunked_candidate (line 135) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l... method convert (line 195) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... class RAGFilter (line 229) | class RAGFilter(LLMFilter): method __init__ (line 230) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ... method __str__ (line 239) | def __str__(self): method get_id_params (line 246) | def get_id_params(self): method get_op_params (line 256) | def get_op_params(self): method naive_cost_estimates (line 266) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method chunk_text (line 295) | def chunk_text(self, text: str, chunk_size: int) -> list[str]: method compute_embedding (line 310) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio... method compute_similarity (line 340) | def compute_similarity(self, query_embedding: list[float], chunk_embed... method get_chunked_candidate (line 346) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l... method filter (line 402) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... FILE: src/palimpzest/query/operators/scan.py class ScanPhysicalOp (line 14) | class ScanPhysicalOp(PhysicalOperator, ABC): method __init__ (line 21) | def __init__(self, datasource: Any, *args, **kwargs): method __str__ (line 25) | def __str__(self): method get_id_params (line 30) | def get_id_params(self): method get_op_params (line 34) | def get_op_params(self): method naive_cost_estimates (line 39) | def naive_cost_estimates( method __call__ (line 60) | def __call__(self, idx: int) -> DataRecordSet: class MarshalAndScanDataOp (line 95) | class MarshalAndScanDataOp(ScanPhysicalOp): method naive_cost_estimates (line 96) | def naive_cost_estimates( class ContextScanOp (line 127) | class ContextScanOp(PhysicalOperator): method __init__ (line 132) | def __init__(self, context: context.Context, *args, **kwargs): method __str__ (line 136) | def __str__(self): method get_id_params (line 141) | def get_id_params(self): method get_op_params (line 144) | def get_op_params(self): method naive_cost_estimates (line 148) | def naive_cost_estimates( method __call__ (line 166) | def __call__(self, *args, **kwargs) -> DataRecordSet: FILE: src/palimpzest/query/operators/search.py function make_tool (line 18) | def make_tool(bound_method): class SmolAgentsSearch (line 39) | class SmolAgentsSearch(PhysicalOperator): method __init__ (line 43) | def __init__(self, context_id: str, search_query: str, *args, **kwargs): method __str__ (line 53) | def __str__(self): method get_id_params (line 59) | def get_id_params(self): method get_op_params (line 67) | def get_op_params(self): method naive_cost_estimates (line 75) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method _create_record_set (line 83) | def _create_record_set( method __call__ (line 127) | def __call__(self, candidate: DataRecord) -> Any: FILE: src/palimpzest/query/operators/split.py class SplitConvert (line 20) | class SplitConvert(LLMConvert): method __init__ (line 21) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,... method __str__ (line 32) | def __str__(self): method get_id_params (line 38) | def get_id_params(self): method get_op_params (line 44) | def get_op_params(self): method naive_cost_estimates (line 48) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method get_text_chunks (line 77) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]: method get_chunked_candidate (line 93) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l... method convert (line 138) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])... class SplitFilter (line 170) | class SplitFilter(LLMFilter): method __init__ (line 171) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,... method __str__ (line 182) | def __str__(self): method get_id_params (line 188) | def get_id_params(self): method get_op_params (line 194) | def get_op_params(self): method naive_cost_estimates (line 198) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method get_text_chunks (line 227) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]: method get_chunked_candidate (line 243) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l... method filter (line 288) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene... FILE: src/palimpzest/query/operators/topk.py class Singleton (line 21) | class Singleton: method __new__ (line 22) | def __new__(cls, *args, **kw): class ClipModel (line 28) | class ClipModel(Singleton): method get_model (line 33) | def get_model(cls, model_name: str): class TopKOp (line 39) | class TopKOp(PhysicalOperator): method __init__ (line 40) | def __init__( method __str__ (line 80) | def __str__(self): method get_id_params (line 85) | def get_id_params(self): method get_op_params (line 97) | def get_op_params(self): method naive_cost_estimates (line 110) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE... method default_search_func (line 122) | def default_search_func(self, index: Collection, query: list[str] | li... method _create_record_set (line 157) | def _create_record_set( method __call__ (line 214) | def __call__(self, candidate: DataRecord) -> DataRecordSet: FILE: src/palimpzest/query/optimizer/cost_model.py class BaseCostModel (line 18) | class BaseCostModel: method __init__ (line 24) | def __init__(self): method get_costed_full_op_ids (line 32) | def get_costed_full_op_ids(self) -> set[str]: method __call__ (line 38) | def __call__(self, operator: PhysicalOperator) -> PlanCost: class SampleBasedCostModel (line 46) | class SampleBasedCostModel: method __init__ (line 49) | def __init__( method get_costed_full_op_ids (line 77) | def get_costed_full_op_ids(self): method _compute_operator_stats (line 80) | def _compute_operator_stats(self, sentinel_plan_stats: SentinelPlanSta... method _compute_naive_plan_cost (line 153) | def _compute_naive_plan_cost(self, operator: PhysicalOperator, source_... method __call__ (line 210) | def __call__(self, operator: PhysicalOperator, source_op_estimates: Op... FILE: src/palimpzest/query/optimizer/optimizer.py class Optimizer (line 49) | class Optimizer: method __init__ (line 64) | def __init__( method update_cost_model (line 164) | def update_cost_model(self, cost_model: BaseCostModel): method get_physical_op_params (line 167) | def get_physical_op_params(self): method deepcopy_clean (line 176) | def deepcopy_clean(self): method update_strategy (line 195) | def update_strategy(self, optimizer_strategy: OptimizationStrategyType): method construct_group_tree (line 207) | def construct_group_tree(self, dataset: Dataset) -> tuple[int, dict[st... method convert_query_plan_to_group_tree (line 341) | def convert_query_plan_to_group_tree(self, dataset: Dataset) -> str: method heuristic_optimization (line 380) | def heuristic_optimization(self, group_id: int) -> None: method search_optimization_space (line 386) | def search_optimization_space(self, group_id: int) -> None: method optimize (line 415) | def optimize(self, dataset: Dataset) -> list[PhysicalPlan]: FILE: src/palimpzest/query/optimizer/optimizer_strategy.py class OptimizationStrategy (line 13) | class OptimizationStrategy(ABC): method get_optimal_plans (line 15) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:... class GreedyStrategy (line 20) | class GreedyStrategy(OptimizationStrategy): method _get_greedy_physical_plan (line 21) | def _get_greedy_physical_plan(self, groups: dict, group_id: int) -> Ph... method get_optimal_plans (line 58) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:... class ParetoStrategy (line 66) | class ParetoStrategy(OptimizationStrategy): method _get_candidate_pareto_physical_plans (line 67) | def _get_candidate_pareto_physical_plans(self, groups: dict, group_id:... method get_optimal_plans (line 119) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:... class SentinelStrategy (line 143) | class SentinelStrategy(OptimizationStrategy): method _get_sentinel_plan (line 144) | def _get_sentinel_plan(self, groups: dict[str, Group], group_id: int) ... method get_optimal_plans (line 172) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:... class NoOptimizationStrategy (line 179) | class NoOptimizationStrategy(GreedyStrategy): FILE: src/palimpzest/query/optimizer/optimizer_strategy_type.py class OptimizationStrategyType (line 11) | class OptimizationStrategyType(Enum): method no_transformation (line 21) | def no_transformation(self) -> bool: method is_pareto (line 27) | def is_pareto(self) -> bool: method is_not_pareto (line 33) | def is_not_pareto(self) -> bool: FILE: src/palimpzest/query/optimizer/plan.py class Plan (line 14) | class Plan(ABC): method compute_plan_id (line 16) | def compute_plan_id(self) -> str: method __eq__ (line 20) | def __eq__(self, other) -> bool: method __hash__ (line 24) | def __hash__(self) -> int: method __repr__ (line 28) | def __repr__(self) -> str: method __str__ (line 32) | def __str__(self) -> str: method __getitem__ (line 36) | def __getitem__(self, slice) -> tuple: method __iter__ (line 40) | def __iter__(self) -> iter: method __len__ (line 44) | def __len__(self) -> int: class PhysicalPlan (line 47) | class PhysicalPlan(Plan): method __init__ (line 48) | def __init__(self, operator: PhysicalOperator, subplans: list[Physical... method compute_plan_id (line 70) | def compute_plan_id(self) -> str: method get_est_total_outputs (line 80) | def get_est_total_outputs(self, num_samples: int | None = None, curren... method _compute_next_unique_full_op_map (line 137) | def _compute_next_unique_full_op_map(self, next_map: dict[str, str | N... method get_next_unique_full_op_and_id (line 171) | def get_next_unique_full_op_and_id(self, topo_idx: int, operator: Phys... method get_next_unique_full_op_id (line 176) | def get_next_unique_full_op_id(self, topo_idx: int, operator: Physical... method _compute_upstream_unique_full_op_ids_map (line 182) | def _compute_upstream_unique_full_op_ids_map(self, upstream_map: dict[... method get_upstream_unique_full_op_ids (line 206) | def get_upstream_unique_full_op_ids(self, unique_full_op_id: str) -> l... method _compute_source_unique_full_op_ids_map (line 210) | def _compute_source_unique_full_op_ids_map(self, source_map: dict[str,... method get_source_unique_full_op_ids (line 234) | def get_source_unique_full_op_ids(self, topo_idx: int, operator: Physi... method __eq__ (line 239) | def __eq__(self, other): method __hash__ (line 242) | def __hash__(self): method __repr__ (line 245) | def __repr__(self) -> str: method _get_str (line 248) | def _get_str(self, idx: int = 0, indent: int = 0) -> str: method __str__ (line 256) | def __str__(self): method __getitem__ (line 259) | def __getitem__(self, slice): method __iter__ (line 263) | def __iter__(self): method __len__ (line 268) | def __len__(self): method _from_ops (line 272) | def _from_ops(cls, ops: list[PhysicalOperator], plan_cost: PlanCost | ... class SentinelPlan (line 290) | class SentinelPlan(Plan): method __init__ (line 291) | def __init__(self, operator_set: list[PhysicalOperator], subplans: lis... method compute_plan_id (line 311) | def compute_plan_id(self) -> str: method __eq__ (line 321) | def __eq__(self, other): method __hash__ (line 324) | def __hash__(self): method __repr__ (line 327) | def __repr__(self) -> str: method _get_str (line 330) | def _get_str(self, idx: int = 0, indent: int = 0) -> str: method __str__ (line 340) | def __str__(self): method __getitem__ (line 343) | def __getitem__(self, slice): method __iter__ (line 347) | def __iter__(self): method __len__ (line 352) | def __len__(self): method _compute_next_unique_logical_op_id_map (line 355) | def _compute_next_unique_logical_op_id_map(self, next_map: dict[str, s... method get_next_unique_logical_op_id (line 389) | def get_next_unique_logical_op_id(self, unique_logical_op_id: str) -> ... method _compute_root_dataset_ids_map (line 393) | def _compute_root_dataset_ids_map(self, root_dataset_ids_map: dict[str... method get_root_dataset_ids (line 421) | def get_root_dataset_ids(self, unique_logical_op_id: str) -> list[str]: method _compute_source_unique_logical_op_ids_map (line 425) | def _compute_source_unique_logical_op_ids_map(self, source_map: dict[s... method get_source_unique_logical_op_ids (line 449) | def get_source_unique_logical_op_ids(self, unique_logical_op_id: str) ... FILE: src/palimpzest/query/optimizer/primitives.py class Expression (line 12) | class Expression: method __init__ (line 19) | def __init__( method __eq__ (line 49) | def __eq__(self, other): method __str__ (line 52) | def __str__(self): method __hash__ (line 61) | def __hash__(self): method _compute_expr_id (line 67) | def _compute_expr_id(self) -> int: method add_applied_rule (line 70) | def add_applied_rule(self, rule: type[rules.Rule]): method set_group_id (line 73) | def set_group_id(self, group_id: int) -> None: class LogicalExpression (line 77) | class LogicalExpression(Expression): class PhysicalExpression (line 81) | class PhysicalExpression(Expression): method from_op_and_logical_expr (line 84) | def from_op_and_logical_expr(cls, op: PhysicalOperator, logical_expres... class Group (line 96) | class Group: method __init__ (line 103) | def __init__(self, logical_expressions: list[LogicalExpression], field... method set_explored (line 119) | def set_explored(self): method _compute_group_id (line 122) | def _compute_group_id(self) -> int: FILE: src/palimpzest/query/optimizer/rules.py class Rule (line 62) | class Rule: method get_rule_id (line 68) | def get_rule_id(cls): method matches_pattern (line 72) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 76) | def substitute(cls, logical_expression: LogicalExpression, **kwargs: d... class TransformationRule (line 80) | class TransformationRule(Rule): method is_exploration_rule (line 88) | def is_exploration_rule(cls) -> bool: method substitute (line 93) | def substitute( class ReorderConverts (line 108) | class ReorderConverts(TransformationRule): method is_exploration_rule (line 114) | def is_exploration_rule(cls) -> bool: method matches_pattern (line 118) | def matches_pattern(cls, logical_expression: Expression) -> bool: method substitute (line 124) | def substitute( class PushDownFilter (line 245) | class PushDownFilter(TransformationRule): method matches_pattern (line 252) | def matches_pattern(cls, logical_expression: Expression) -> bool: method substitute (line 258) | def substitute( class ImplementationRule (line 375) | class ImplementationRule(Rule): method _get_image_fields (line 381) | def _get_image_fields(cls, logical_expression: LogicalExpression) -> s... method _get_list_image_fields (line 390) | def _get_list_image_fields(cls, logical_expression: LogicalExpression)... method _get_audio_fields (line 399) | def _get_audio_fields(cls, logical_expression: LogicalExpression) -> s... method _get_list_audio_fields (line 408) | def _get_list_audio_fields(cls, logical_expression: LogicalExpression)... method _is_image_only_operation (line 417) | def _is_image_only_operation(cls, logical_expression: LogicalExpressio... method _is_image_operation (line 426) | def _is_image_operation(cls, logical_expression: LogicalExpression) ->... method _is_audio_only_operation (line 435) | def _is_audio_only_operation(cls, logical_expression: LogicalExpressio... method _is_audio_operation (line 444) | def _is_audio_operation(cls, logical_expression: LogicalExpression) ->... method _is_text_only_operation (line 453) | def _is_text_only_operation(cls, logical_expression: LogicalExpression... method _is_text_operation (line 462) | def _is_text_operation(cls, logical_expression: LogicalExpression) -> ... method _is_text_image_multimodal_operation (line 472) | def _is_text_image_multimodal_operation(cls, logical_expression: Logic... method _is_text_audio_multimodal_operation (line 477) | def _is_text_audio_multimodal_operation(cls, logical_expression: Logic... method _model_matches_input (line 482) | def _model_matches_input(cls, model: Model, logical_expression: Logica... method _embedding_model_matches_input (line 525) | def _embedding_model_matches_input(cls, model: Model, logical_expressi... method _get_fixed_op_kwargs (line 534) | def _get_fixed_op_kwargs(cls, logical_expression: LogicalExpression, r... method _perform_substitution (line 553) | def _perform_substitution( class NonLLMConvertRule (line 606) | class NonLLMConvertRule(ImplementationRule): method matches_pattern (line 612) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 618) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class LLMConvertBondedRule (line 623) | class LLMConvertBondedRule(ImplementationRule): method matches_pattern (line 629) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 635) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class RAGRule (line 655) | class RAGRule(ImplementationRule): method matches_pattern (line 664) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 672) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class MixtureOfAgentsRule (line 711) | class MixtureOfAgentsRule(ImplementationRule): method matches_pattern (line 720) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 728) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class CritiqueAndRefineRule (line 752) | class CritiqueAndRefineRule(ImplementationRule): method matches_pattern (line 758) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 766) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class SplitRule (line 801) | class SplitRule(ImplementationRule): method matches_pattern (line 809) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 817) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class TopKRule (line 839) | class TopKRule(ImplementationRule): method matches_pattern (line 846) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 852) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class NonLLMFilterRule (line 861) | class NonLLMFilterRule(ImplementationRule): method matches_pattern (line 867) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 874) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class LLMFilterRule (line 879) | class LLMFilterRule(ImplementationRule): method matches_pattern (line 885) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 892) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class RelationalJoinRule (line 912) | class RelationalJoinRule(ImplementationRule): method matches_pattern (line 918) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 924) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class NestedLoopsJoinRule (line 929) | class NestedLoopsJoinRule(ImplementationRule): method matches_pattern (line 935) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 941) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class EmbeddingJoinRule (line 963) | class EmbeddingJoinRule(ImplementationRule): method matches_pattern (line 969) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 975) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class SemanticAggregateRule (line 1001) | class SemanticAggregateRule(ImplementationRule): method matches_pattern (line 1007) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 1013) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class AggregateRule (line 1033) | class AggregateRule(ImplementationRule): method matches_pattern (line 1039) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 1045) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class AddContextsBeforeComputeRule (line 1067) | class AddContextsBeforeComputeRule(ImplementationRule): method matches_pattern (line 1077) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 1083) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... class BasicSubstitutionRule (line 1111) | class BasicSubstitutionRule(ImplementationRule): method matches_pattern (line 1129) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool: method substitute (line 1136) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k... FILE: src/palimpzest/query/optimizer/tasks.py class Task (line 17) | class Task: method perform (line 24) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ... class OptimizeGroup (line 33) | class OptimizeGroup(Task): method __init__ (line 44) | def __init__(self, group_id: int): method perform (line 47) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ... class ExploreGroup (line 79) | class ExploreGroup(Task): method __init__ (line 84) | def __init__(self, group_id: int): method perform (line 87) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ... class OptimizeLogicalExpression (line 119) | class OptimizeLogicalExpression(Task): method __init__ (line 127) | def __init__(self, logical_expression: Expression, exploring: bool = F... method perform (line 131) | def perform( class ApplyRule (line 168) | class ApplyRule(Task): method __init__ (line 188) | def __init__(self, rule: type[Rule], logical_expression: Expression, e... method perform (line 193) | def perform( class OptimizePhysicalExpression (line 277) | class OptimizePhysicalExpression(Task): method __init__ (line 287) | def __init__(self, physical_expression: Expression, exploring: bool = ... method update_best_physical_expression (line 291) | def update_best_physical_expression(self, group: Group, policy: Policy... method _is_dominated (line 324) | def _is_dominated(self, plan_cost: PlanCost, other_plan_cost: PlanCost... method _is_pareto_optimal (line 363) | def _is_pareto_optimal(self, expr_plan_cost: PlanCost, pareto_optimal_... method update_pareto_optimal_physical_expressions (line 376) | def update_pareto_optimal_physical_expressions(self, group: Group, pol... method perform (line 423) | def perform( FILE: src/palimpzest/query/processor/config.py class QueryProcessorConfig (line 10) | class QueryProcessorConfig(BaseModel): method to_dict (line 57) | def to_dict(self) -> dict: method copy (line 61) | def copy(self) -> QueryProcessorConfig: FILE: src/palimpzest/query/processor/query_processor.py class QueryProcessor (line 18) | class QueryProcessor: method __init__ (line 25) | def __init__( method execution_id (line 69) | def execution_id(self) -> str: method _create_sentinel_plan (line 80) | def _create_sentinel_plan(self, train_dataset: dict[str, Dataset] | No... method _execute_best_plan (line 100) | def _execute_best_plan(self, dataset: Dataset, optimizer: Optimizer) -... method execute (line 111) | def execute(self) -> DataRecordCollection: FILE: src/palimpzest/query/processor/query_processor_factory.py class QueryProcessorFactory (line 23) | class QueryProcessorFactory: method _convert_to_enum (line 26) | def _convert_to_enum(cls, enum_type: type[Enum], value: str) -> Enum: method _normalize_strategies (line 34) | def _normalize_strategies(cls, config: QueryProcessorConfig): method _normalize_models (line 60) | def _normalize_models(cls, config: QueryProcessorConfig) -> QueryProce... method _config_validation_and_normalization (line 109) | def _config_validation_and_normalization(cls, config: QueryProcessorCo... method _create_optimizer (line 164) | def _create_optimizer(cls, config: QueryProcessorConfig) -> Optimizer: method _create_execution_strategy (line 168) | def _create_execution_strategy(cls, dataset: Dataset, config: QueryPro... method _create_sentinel_execution_strategy (line 187) | def _create_sentinel_execution_strategy(cls, config: QueryProcessorCon... method create_processor (line 198) | def create_processor( method create_and_run_processor (line 238) | def create_and_run_processor( FILE: src/palimpzest/schemabuilder/schema_builder.py class SchemaBuilder (line 21) | class SchemaBuilder: method from_file (line 24) | def from_file(cls, method from_csv (line 98) | def from_csv( method from_jsonld (line 132) | def from_jsonld( method from_json (line 176) | def from_json( method from_yml (line 201) | def from_yml( FILE: src/palimpzest/tools/allenpdf.py function process_papermage_pdf (line 34) | def process_papermage_pdf(pdf_bytes_docs: list[bytes]): function main (line 59) | def main(): FILE: src/palimpzest/tools/pdfparser.py function get_md5 (line 17) | def get_md5(file_bytes: bytes) -> str: function cosmos_parquet_to_json (line 26) | def cosmos_parquet_to_json(path): function cosmos_json_txt (line 99) | def cosmos_json_txt(cosmos_json): function cosmos_client (line 111) | def cosmos_client(name: str, data: BinaryIO, output_dir: str, delay=10): function get_text_from_pdf (line 191) | def get_text_from_pdf(filename, pdf_bytes, pdfprocessor="pypdf", enable_... FILE: src/palimpzest/tools/skema_tools.py function equations_to_latex (line 10) | def equations_to_latex(image_content): function equations_to_latex_base64 (line 19) | def equations_to_latex_base64(image_content): FILE: src/palimpzest/utils/env_helpers.py function load_env (line 5) | def load_env(): FILE: src/palimpzest/utils/hash_helpers.py function hash_for_id (line 7) | def hash_for_id(id_str: str, max_chars: int = MAX_ID_CHARS) -> str: function hash_for_serialized_dict (line 11) | def hash_for_serialized_dict(dict_obj: dict) -> str: FILE: src/palimpzest/utils/model_helpers.py function get_models (line 8) | def get_models(include_embedding: bool = False, use_vertex: bool = False... function get_optimal_models (line 78) | def get_optimal_models(policy: Policy, include_embedding: bool = False, ... function use_reasoning_prompt (line 194) | def use_reasoning_prompt(reasoning_effort: str) -> bool: function resolve_reasoning_effort (line 202) | def resolve_reasoning_effort(model: Model, reasoning_effort: str) -> str... FILE: src/palimpzest/utils/model_info_helpers.py function _normalize_model_name (line 192) | def _normalize_model_name(name: str) -> str: function _extract_version_info (line 197) | def _extract_version_info(name: str) -> tuple[str, str | None, str | None]: function fuzzy_match_score (line 233) | def fuzzy_match_score(model_id: str, scores_dict: dict[str, float]) -> f... function _extract_model_size (line 297) | def _extract_model_size(model_id: str) -> str | None: function derive_model_flags (line 310) | def derive_model_flags(model_id: str) -> dict[str, bool]: function _estimate_tps_from_size (line 373) | def _estimate_tps_from_size(model_id: str) -> float | None: function predict_local_model_metrics (line 403) | def predict_local_model_metrics(model_id: str) -> dict[str, Any]: class ModelMetricsManager (line 458) | class ModelMetricsManager: method __new__ (line 464) | def __new__(cls, *args, **kwargs): method __init__ (line 469) | def __init__(self): method _load_data (line 476) | def _load_data(self): method get_model_metrics (line 485) | def get_model_metrics(self, model_name) -> dict[str, Any]: method refresh_data (line 489) | def refresh_data(self) -> None: FILE: src/palimpzest/utils/progress.py class ProgressStats (line 32) | class ProgressStats: function get_memory_usage (line 42) | def get_memory_usage() -> float: class ProgressManager (line 53) | class ProgressManager(ABC): method __init__ (line 56) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int... method get_task_total (line 117) | def get_task_total(self, unique_full_op_id: str) -> int: method get_task_description (line 122) | def get_task_description(self, unique_full_op_id: str) -> str: method add_task (line 128) | def add_task(self, unique_full_op_id: str, op_str: str, total: int): method start (line 133) | def start(self): method incr (line 138) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output... method finish (line 152) | def finish(self): class MockProgressManager (line 157) | class MockProgressManager(ProgressManager): method __init__ (line 160) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int... method add_task (line 163) | def add_task(self, unique_full_op_id: str, op_str: str, total: int): method start (line 166) | def start(self): method incr (line 169) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output... method finish (line 172) | def finish(self): method incr_overall_progress_cost (line 175) | def incr_overall_progress_cost(self, cost_delta: float): class PZProgressManager (line 178) | class PZProgressManager(ProgressManager): method __init__ (line 181) | def __init__(self, plan: PhysicalPlan, num_samples: int | None = None): method add_task (line 185) | def add_task(self, unique_full_op_id: str, op_str: str, total: int): method start (line 203) | def start(self): method incr (line 213) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output... method finish (line 265) | def finish(self): method update_stats (line 278) | def update_stats(self, unique_full_op_id: str, **kwargs): class PZSentinelProgressManager (line 288) | class PZSentinelProgressManager(ProgressManager): method __init__ (line 289) | def __init__(self, plan: SentinelPlan, sample_budget: int | None, samp... method _is_llm_op (line 358) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool: method get_task_description (line 365) | def get_task_description(self, unique_logical_op_id: str) -> str: method add_task (line 370) | def add_task(self, unique_logical_op_id: str, op_str: str, total: int): method start (line 388) | def start(self): method incr_overall_progress_cost (line 398) | def incr_overall_progress_cost(self, cost_delta: float): method incr (line 411) | def incr(self, unique_logical_op_id: str, num_samples: int, display_te... method finish (line 455) | def finish(self): method update_stats (line 468) | def update_stats(self, unique_logical_op_id: str, **kwargs): function create_progress_manager (line 478) | def create_progress_manager( FILE: src/palimpzest/utils/udfs.py function url_to_file (line 14) | def url_to_file(candidate: dict): function file_to_xls (line 28) | def file_to_xls(candidate: dict): function xls_to_tables (line 34) | def xls_to_tables(candidate: dict): FILE: src/palimpzest/validator/validator.py class Validator (line 25) | class Validator: method __init__ (line 33) | def __init__(self, model: Model = Model.o4_MINI): method map_score_fn (line 38) | def map_score_fn(self, fields: list[str], input_record: dict, output: ... method flat_map_score_fn (line 41) | def flat_map_score_fn(self, fields: list[str], input_record: dict, out... method filter_score_fn (line 44) | def filter_score_fn(self, filter_str: str, input_record: dict, output:... method join_score_fn (line 47) | def join_score_fn(self, condition: str, left_input_record: dict, right... method topk_score_fn (line 50) | def topk_score_fn(self, fields: list[str], input_record: dict, output:... method _get_gen_stats_from_completion (line 53) | def _get_gen_stats_from_completion(self, completion, start_time: float... method _default_map_score_fn (line 75) | def _default_map_score_fn(self, op: LLMConvert, fields: list[str], inp... method _default_flat_map_score_fn (line 111) | def _default_flat_map_score_fn(self, op: LLMConvert, fields: list[str]... method _default_filter_score_fn (line 150) | def _default_filter_score_fn(self, op: LLMFilter, filter_str: str, inp... method _default_join_score_fn (line 190) | def _default_join_score_fn(self, op: JoinOp, condition: str, left_inpu... method _default_topk_score_fn (line 227) | def _default_topk_score_fn(self, op: TopKOp, fields: list[str], input_... method _score_map (line 267) | def _score_map(self, op: LLMConvert, fields: list[str], input_record: ... method _score_flat_map (line 276) | def _score_flat_map(self, op: LLMConvert, fields: list[str], input_rec... method _score_filter (line 285) | def _score_filter(self, op: LLMFilter, filter_str: str, input_record: ... method _score_join (line 294) | def _score_join(self, op: JoinOp, condition: str, left_input_record: D... method _score_topk (line 303) | def _score_topk(self, op: TopKOp, fields: list[str], input_record: Dat... FILE: tests/pytest/conftest.py function dataset (line 26) | def dataset(request, enron_eval_tiny, real_estate_eval_tiny): function workload (line 36) | def workload( function policy (line 56) | def policy(request): function physical_plan (line 68) | def physical_plan( function sentinel_plan (line 92) | def sentinel_plan( function execution_data (line 105) | def execution_data( function expected_records (line 120) | def expected_records( function champion_outputs (line 146) | def champion_outputs( function expected_qualities (line 164) | def expected_qualities( function side_effect (line 184) | def side_effect( function operator_to_stats (line 203) | def operator_to_stats( function expected_plan (line 232) | def expected_plan( FILE: tests/pytest/fixtures/champion_outputs.py function scan_convert_filter_champion_outputs (line 10) | def scan_convert_filter_champion_outputs(scan_convert_filter_sentinel_pl... function scan_convert_filter_empty_champion_outputs (line 47) | def scan_convert_filter_empty_champion_outputs(scan_convert_filter_senti... function scan_convert_filter_varied_champion_outputs (line 84) | def scan_convert_filter_varied_champion_outputs(scan_convert_filter_sent... function scan_multi_convert_multi_filter_champion_outputs (line 121) | def scan_multi_convert_multi_filter_champion_outputs(scan_multi_convert_... FILE: tests/pytest/fixtures/datasets.py class RealEstateListingDataset (line 16) | class RealEstateListingDataset(IterDataset): method __init__ (line 17) | def __init__(self, listings_dir): method __len__ (line 28) | def __len__(self): method __getitem__ (line 31) | def __getitem__(self, idx: int): class CostModelTestDataset (line 49) | class CostModelTestDataset(IterDataset): method __init__ (line 50) | def __init__(self): method __len__ (line 54) | def __len__(self): method __getitem__ (line 57) | def __getitem__(self, idx: int): function project_root (line 66) | def project_root() -> Path: function enron_eval_tiny_data_path (line 71) | def enron_eval_tiny_data_path(project_root) -> str: function real_estate_eval_tiny_data_path (line 76) | def real_estate_eval_tiny_data_path(project_root) -> str: function enron_eval_tiny (line 82) | def enron_eval_tiny(enron_eval_tiny_data_path): function real_estate_eval_tiny (line 87) | def real_estate_eval_tiny(real_estate_eval_tiny_data_path): function cost_model_test_dataset (line 92) | def cost_model_test_dataset(): FILE: tests/pytest/fixtures/execution_data.py function scan_convert_filter_execution_data (line 11) | def scan_convert_filter_execution_data(scan_convert_filter_sentinel_plan... function scan_convert_filter_varied_execution_data (line 114) | def scan_convert_filter_varied_execution_data(scan_convert_filter_sentin... function scan_multi_convert_multi_filter_execution_data (line 225) | def scan_multi_convert_multi_filter_execution_data(scan_multi_convert_mu... FILE: tests/pytest/fixtures/expected_physical_plans.py function get_three_converts_plan (line 17) | def get_three_converts_plan(three_converts_workload, enron_eval_tiny, em... function three_converts_min_cost_expected_plan (line 52) | def three_converts_min_cost_expected_plan(three_converts_workload, enron... function three_converts_max_quality_expected_plan (line 71) | def three_converts_max_quality_expected_plan(three_converts_workload, en... function three_converts_min_cost_at_fixed_quality_expected_plan (line 90) | def three_converts_min_cost_at_fixed_quality_expected_plan(three_convert... function three_converts_max_quality_at_fixed_cost_expected_plan (line 109) | def three_converts_max_quality_at_fixed_cost_expected_plan(three_convert... function get_one_filter_one_convert_plan (line 128) | def get_one_filter_one_convert_plan(one_filter_one_convert_workload, enr... function one_filter_one_convert_min_cost_expected_plan (line 163) | def one_filter_one_convert_min_cost_expected_plan(one_filter_one_convert... function get_two_converts_two_filters_plan (line 186) | def get_two_converts_two_filters_plan(two_converts_two_filters_workload,... function two_converts_two_filters_min_cost_expected_plan (line 228) | def two_converts_two_filters_min_cost_expected_plan(two_converts_two_fil... function two_converts_two_filters_max_quality_expected_plan (line 257) | def two_converts_two_filters_max_quality_expected_plan(two_converts_two_... function two_converts_two_filters_min_cost_at_fixed_quality_expected_plan (line 286) | def two_converts_two_filters_min_cost_at_fixed_quality_expected_plan(two... function two_converts_two_filters_max_quality_at_fixed_cost_expected_plan (line 315) | def two_converts_two_filters_max_quality_at_fixed_cost_expected_plan(two... FILE: tests/pytest/fixtures/expected_qualities.py function scan_convert_filter_qualities (line 8) | def scan_convert_filter_qualities(scan_convert_filter_execution_data): function scan_convert_filter_empty_qualities (line 19) | def scan_convert_filter_empty_qualities(scan_convert_filter_execution_da... function scan_convert_filter_varied_qualities (line 44) | def scan_convert_filter_varied_qualities(scan_convert_filter_varied_exec... function scan_convert_filter_varied_override_qualities (line 76) | def scan_convert_filter_varied_override_qualities(scan_convert_filter_va... function scan_multi_convert_multi_filter_qualities (line 135) | def scan_multi_convert_multi_filter_qualities(scan_multi_convert_multi_f... FILE: tests/pytest/fixtures/expected_records.py function enron_all_expected_records (line 12) | def enron_all_expected_records(enron_eval_tiny_data_path): function enron_filter_expected_records (line 25) | def enron_filter_expected_records(enron_all_expected_records): function real_estate_all_expected_records (line 35) | def real_estate_all_expected_records(real_estate_eval_tiny_data_path, im... function real_estate_one_to_many_expected_records (line 58) | def real_estate_one_to_many_expected_records(real_estate_eval_tiny_data_... function scan_convert_filter_expected_outputs (line 84) | def scan_convert_filter_expected_outputs(foobar_schema): function scan_convert_filter_empty_expected_outputs (line 102) | def scan_convert_filter_empty_expected_outputs(): function scan_convert_filter_varied_expected_outputs (line 106) | def scan_convert_filter_varied_expected_outputs(foobar_schema): function scan_multi_convert_multi_filter_expected_outputs (line 127) | def scan_multi_convert_multi_filter_expected_outputs(foobar_schema, baz_... FILE: tests/pytest/fixtures/models.py function embedding_text_only_model (line 9) | def embedding_text_only_model(): FILE: tests/pytest/fixtures/operator_to_stats.py function get_three_converts_logical_and_full_op_ids (line 15) | def get_three_converts_logical_and_full_op_ids(three_converts_workload, ... function three_converts_min_cost_operator_to_stats (line 71) | def three_converts_min_cost_operator_to_stats(three_converts_workload, e... function three_converts_max_quality_operator_to_stats (line 100) | def three_converts_max_quality_operator_to_stats(three_converts_workload... function three_converts_min_cost_at_fixed_quality_operator_to_stats (line 129) | def three_converts_min_cost_at_fixed_quality_operator_to_stats(three_con... function three_converts_max_quality_at_fixed_cost_operator_to_stats (line 158) | def three_converts_max_quality_at_fixed_cost_operator_to_stats(three_con... function get_one_filter_one_convert_logical_and_full_op_ids (line 190) | def get_one_filter_one_convert_logical_and_full_op_ids(one_filter_one_co... function one_filter_one_convert_min_cost_operator_to_stats (line 240) | def one_filter_one_convert_min_cost_operator_to_stats(one_filter_one_con... function get_two_converts_two_filters_logical_and_full_op_ids (line 264) | def get_two_converts_two_filters_logical_and_full_op_ids(two_converts_tw... function two_converts_two_filters_min_cost_operator_to_stats (line 332) | def two_converts_two_filters_min_cost_operator_to_stats(two_converts_two... function two_converts_two_filters_max_quality_operator_to_stats (line 366) | def two_converts_two_filters_max_quality_operator_to_stats(two_converts_... function two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats (line 400) | def two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats... function two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats (line 434) | def two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats... FILE: tests/pytest/fixtures/physical_plans.py function scan_only_plan (line 16) | def scan_only_plan(enron_eval_tiny): function non_llm_filter_plan (line 23) | def non_llm_filter_plan(enron_eval_tiny): function llm_filter_plan (line 36) | def llm_filter_plan(enron_eval_tiny): function bonded_llm_convert_plan (line 51) | def bonded_llm_convert_plan(email_schema, enron_eval_tiny): function rag_convert_plan (line 64) | def rag_convert_plan(email_schema, enron_eval_tiny, embedding_text_only_... function image_convert_plan (line 80) | def image_convert_plan(real_estate_listing_files_schema, image_real_esta... function one_to_many_convert_plan (line 93) | def one_to_many_convert_plan(real_estate_listing_files_schema, room_real... function scan_convert_filter_sentinel_plan (line 107) | def scan_convert_filter_sentinel_plan(foobar_schema): function scan_multi_convert_multi_filter_sentinel_plan (line 134) | def scan_multi_convert_multi_filter_sentinel_plan(foobar_schema, baz_sch... FILE: tests/pytest/fixtures/schemas.py function email_schema (line 11) | def email_schema(): function real_estate_listing_files_schema (line 21) | def real_estate_listing_files_schema(): function text_real_estate_listing_schema (line 32) | def text_real_estate_listing_schema(real_estate_listing_files_schema): function image_real_estate_listing_schema (line 42) | def image_real_estate_listing_schema(real_estate_listing_files_schema): function room_real_estate_listing_schema (line 57) | def room_real_estate_listing_schema(real_estate_listing_files_schema): function case_data_schema (line 69) | def case_data_schema(): function foobar_schema (line 100) | def foobar_schema(): function baz_schema (line 108) | def baz_schema(): FILE: tests/pytest/fixtures/side_effects.py function enron_filter (line 8) | def enron_filter(): function enron_convert (line 20) | def enron_convert(email_schema): function real_estate_convert (line 52) | def real_estate_convert(image_real_estate_listing_schema): function real_estate_one_to_many_convert (line 70) | def real_estate_one_to_many_convert(room_real_estate_listing_schema): FILE: tests/pytest/fixtures/workloads.py function within_two_miles_of_mit (line 5) | def within_two_miles_of_mit(record): function in_price_range (line 26) | def in_price_range(record): function enron_workload (line 39) | def enron_workload(enron_eval_tiny, email_schema): function small_real_estate_workload (line 52) | def small_real_estate_workload( function real_estate_workload (line 68) | def real_estate_workload( function three_converts_workload (line 92) | def three_converts_workload(enron_eval_tiny, email_schema, foobar_schema... function one_filter_one_convert_workload (line 102) | def one_filter_one_convert_workload(enron_eval_tiny, email_schema): function two_converts_two_filters_workload (line 111) | def two_converts_two_filters_workload(enron_eval_tiny, email_schema, foo... FILE: tests/pytest/test_aggregate.py class TextInputSchema (line 21) | class TextInputSchema(BaseModel): class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel): class OutputSchema (line 38) | class OutputSchema(BaseModel): function create_input_record (line 41) | def create_input_record(input_schema: type[BaseModel], idx: int) -> Data... function mock_generator_call (line 59) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou... function test_aggregate (line 77) | def test_aggregate(mocker, input_schema, physical_op_class): FILE: tests/pytest/test_convert.py function test_convert (line 27) | def test_convert(mocker, convert_op, side_effect, email_schema, enron_ev... FILE: tests/pytest/test_dataset.py function sample_df (line 11) | def sample_df(): function test_dataset_initialization (line 19) | def test_dataset_initialization(sample_df): function test_dataset_filter (line 25) | def test_dataset_filter(sample_df): function test_dataset_add_columns (line 39) | def test_dataset_add_columns(sample_df): FILE: tests/pytest/test_distinct.py function sample_df (line 20) | def sample_df(): function test_distinct (line 29) | def test_distinct(sample_df, execution_strategy): function test_dataset_with_distinct_cols (line 39) | def test_dataset_with_distinct_cols(sample_df, execution_strategy): function test_dataset_with_distinct_cols_and_limit (line 49) | def test_dataset_with_distinct_cols_and_limit(sample_df, execution_strat... function test_dataset_with_distinct_cols_and_filter (line 59) | def test_dataset_with_distinct_cols_and_filter(sample_df, execution_stra... FILE: tests/pytest/test_dynamic_models.py function input_schema (line 48) | def input_schema(): function output_schema (line 56) | def output_schema(): function sample_record (line 64) | def sample_record(input_schema): function mock_litellm_response (line 70) | def mock_litellm_response(): class TestModelInstantiation (line 85) | class TestModelInstantiation: method test_known_model_instantiation (line 88) | def test_known_model_instantiation(self): method test_model_instantiation_with_string (line 94) | def test_model_instantiation_with_string(self): method test_unknown_model_raises_error (line 101) | def test_unknown_model_raises_error(self): method test_model_properties_from_specs (line 106) | def test_model_properties_from_specs(self): method test_model_provider_property (line 115) | def test_model_provider_property(self): method test_model_api_base_parameter (line 123) | def test_model_api_base_parameter(self): class TestModelRegistry (line 135) | class TestModelRegistry: method test_models_registered_on_creation (line 138) | def test_models_registered_on_creation(self): method test_get_all_models_returns_list (line 148) | def test_get_all_models_returns_list(self): method test_registry_contains_expected_models (line 154) | def test_registry_contains_expected_models(self): class TestModelEqualityAndHashing (line 172) | class TestModelEqualityAndHashing: method test_model_equality_same_instance (line 175) | def test_model_equality_same_instance(self): method test_model_equality_same_value (line 180) | def test_model_equality_same_value(self): method test_model_equality_with_string (line 186) | def test_model_equality_with_string(self): method test_model_inequality (line 191) | def test_model_inequality(self): method test_model_hash_consistency (line 195) | def test_model_hash_consistency(self): method test_model_usable_in_set (line 201) | def test_model_usable_in_set(self): method test_model_usable_as_dict_key (line 206) | def test_model_usable_as_dict_key(self): method test_model_str_repr (line 211) | def test_model_str_repr(self): method test_model_lt_comparison (line 217) | def test_model_lt_comparison(self): class TestModelHelperFunctions (line 229) | class TestModelHelperFunctions: method test_get_models_with_openai_key (line 232) | def test_get_models_with_openai_key(self): method test_get_models_excludes_embedding_by_default (line 239) | def test_get_models_excludes_embedding_by_default(self): method test_get_models_includes_embedding_when_requested (line 246) | def test_get_models_includes_embedding_when_requested(self): method test_get_models_empty_without_keys (line 253) | def test_get_models_empty_without_keys(self): method test_get_optimal_models_returns_top_models (line 264) | def test_get_optimal_models_returns_top_models(self): method test_get_optimal_models_respects_policy (line 270) | def test_get_optimal_models_respects_policy(self): method test_get_optimal_models_never_returns_empty_with_available_models (line 283) | def test_get_optimal_models_never_returns_empty_with_available_models(... method test_get_optimal_models_fallback_returns_best_by_primary_metric (line 293) | def test_get_optimal_models_fallback_returns_best_by_primary_metric(se... method test_get_optimal_models_fallback_with_time_policy (line 308) | def test_get_optimal_models_fallback_with_time_policy(self): class TestGeneratorIntegration (line 323) | class TestGeneratorIntegration: method test_generator_uses_model_value (line 327) | def test_generator_uses_model_value( method test_generator_with_different_providers (line 354) | def test_generator_with_different_providers( class TestQueryProcessorIntegration (line 384) | class TestQueryProcessorIntegration: method test_factory_accepts_model_list (line 388) | def test_factory_accepts_model_list(self, mock_processor_cls): method test_factory_auto_selects_models_when_none_provided (line 409) | def test_factory_auto_selects_models_when_none_provided(self): class TestEndToEndIntegration (line 439) | class TestEndToEndIntegration: method test_simple_sem_map_pipeline (line 446) | def test_simple_sem_map_pipeline(self): method test_pipeline_with_filter (line 490) | def test_pipeline_with_filter(self): method test_pipeline_with_auto_model_selection (line 526) | def test_pipeline_with_auto_model_selection(self): class TestVLLMModelSupport (line 555) | class TestVLLMModelSupport: method test_vllm_model_creation_with_api_base (line 560) | def test_vllm_model_creation_with_api_base(self): method test_vllm_model_stores_extra_kwargs (line 566) | def test_vllm_model_stores_extra_kwargs(self): method test_vllm_model_without_api_base_raises (line 571) | def test_vllm_model_without_api_base_raises(self): method test_vllm_model_cost_is_zero (line 578) | def test_vllm_model_cost_is_zero(self): method test_predict_local_model_metrics_known_model (line 590) | def test_predict_local_model_metrics_known_model(self): method test_predict_local_model_metrics_unknown_model (line 596) | def test_predict_local_model_metrics_unknown_model(self): method test_vllm_model_has_quality_score (line 602) | def test_vllm_model_has_quality_score(self): method test_vllm_model_has_latency (line 608) | def test_vllm_model_has_latency(self): method test_vllm_model_unknown_gets_defaults (line 614) | def test_vllm_model_unknown_gets_defaults(self): method test_fuzzy_match_exact_substring (line 622) | def test_fuzzy_match_exact_substring(self): method test_fuzzy_match_normalized (line 627) | def test_fuzzy_match_normalized(self): method test_fuzzy_match_no_match_returns_none (line 632) | def test_fuzzy_match_no_match_returns_none(self): method test_derive_model_flags_llama (line 639) | def test_derive_model_flags_llama(self): method test_derive_model_flags_non_llama (line 644) | def test_derive_model_flags_non_llama(self): method test_derive_model_flags_clip (line 649) | def test_derive_model_flags_clip(self): method test_derive_model_flags_gpt5 (line 654) | def test_derive_model_flags_gpt5(self): method test_derive_model_flags_o_model (line 659) | def test_derive_model_flags_o_model(self): method test_vllm_model_is_vllm (line 666) | def test_vllm_model_is_vllm(self): method test_vllm_llama_model_is_llama (line 671) | def test_vllm_llama_model_is_llama(self): method test_vllm_non_llama_is_not_llama (line 676) | def test_vllm_non_llama_is_not_llama(self): method test_vllm_model_defaults (line 683) | def test_vllm_model_defaults(self): method test_factory_rejects_multiple_vllm_models (line 691) | def test_factory_rejects_multiple_vllm_models(self): method test_generator_passes_vllm_kwargs (line 710) | def test_generator_passes_vllm_kwargs(self, mock_completion, sample_re... FILE: tests/pytest/test_dynamicschema.py function test_dynamicschema_jsonld (line 15) | def test_dynamicschema_jsonld(project_root: Path): function test_dynamicschema_csv (line 20) | def test_dynamicschema_csv(project_root: Path): function test_dynamicschema_json (line 26) | def test_dynamicschema_json(mocker, enron_workload, enron_convert, enron... function test_dynamicschema_yml (line 55) | def test_dynamicschema_yml(mocker, enron_workload, enron_convert, enron_... FILE: tests/pytest/test_execution.py class TestExecution (line 19) | class TestExecution: method test_execute_full_plan (line 54) | def test_execute_full_plan(self, mocker, execution_strategy, dataset, ... FILE: tests/pytest/test_filter.py class TextInputSchema (line 26) | class TextInputSchema(BaseModel): class ImageInputSchema (line 30) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 34) | class AudioInputSchema(BaseModel): function mock_generator_call (line 43) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou... function test_filter (line 61) | def test_filter(mocker, input_schema, physical_op_class, embedding_text_... FILE: tests/pytest/test_generator.py function generate_session_id (line 15) | def generate_session_id() -> str: function question (line 24) | def question(): function output_schema (line 31) | def output_schema(): function test_generator (line 46) | def test_generator(model, question, output_schema): function test_vllm_generator (line 55) | def test_vllm_generator(question, output_schema): class TextInputSchema (line 154) | class TextInputSchema(BaseModel): class ImageInputSchema (line 160) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 166) | class AudioInputSchema(BaseModel): class AnimalOutputSchema (line 179) | class AnimalOutputSchema(BaseModel): function create_input_record (line 468) | def create_input_record(input_schema, modality: str): function get_model_for_provider (line 496) | def get_model_for_provider(provider: str) -> Model: function get_input_schema_for_modality (line 514) | def get_input_schema_for_modality(modality: str): function check_api_key (line 570) | def check_api_key(provider: str) -> bool: function is_modality_supported (line 579) | def is_modality_supported(provider: str, modality: str) -> bool: function within_tolerance (line 584) | def within_tolerance(actual: int, expected: int, tolerance: float = 0.05... function assert_stats_match (line 592) | def assert_stats_match(gen_stats, expected: dict, request_name: str, pro... function test_generator_stats (line 675) | def test_generator_stats(provider, modality): FILE: tests/pytest/test_iter_dataset.py function temp_text_file (line 17) | def temp_text_file(): function temp_text_dir (line 25) | def temp_text_dir(): function list_values (line 38) | def list_values(): function df_values (line 42) | def df_values(): function test_text_dataset (line 46) | def test_text_dataset(temp_text_dir): function test_memory_dataset_list (line 58) | def test_memory_dataset_list(list_values): function test_memory_dataset_df (line 69) | def test_memory_dataset_df(df_values): function test_memory_dataset_copy (line 81) | def test_memory_dataset_copy(): function temp_html_dir (line 89) | def temp_html_dir(tmp_path): function test_html_dataset (line 103) | def test_html_dataset(temp_html_dir): function test_invalid_directory (line 113) | def test_invalid_directory(): FILE: tests/pytest/test_join.py class TextInputSchema (line 21) | class TextInputSchema(BaseModel): class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel): function create_input_record (line 38) | def create_input_record(schema: type[BaseModel]) -> DataRecord: function mock_generator_call (line 53) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou... function embedding_join_mock_generator_call (line 60) | def embedding_join_mock_generator_call(candidate, fields, right_candidat... function test_join (line 82) | def test_join(mocker, left_input_schema, right_input_schema, physical_op... function test_embedding_join (line 134) | def test_embedding_join(mocker, embedding_text_only_model): FILE: tests/pytest/test_map.py class TextInputSchema (line 25) | class TextInputSchema(BaseModel): class ImageInputSchema (line 29) | class ImageInputSchema(BaseModel): class AudioInputSchema (line 33) | class AudioInputSchema(BaseModel): class OutputSchema (line 42) | class OutputSchema(BaseModel): function mock_generator_call (line 45) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou... function test_map (line 63) | def test_map(mocker, input_schema, physical_op_class, embedding_text_onl... FILE: tests/pytest/test_optimizer.py class TestPrimitives (line 22) | class TestPrimitives: method test_group_id_equality (line 23) | def test_group_id_equality(self, email_schema): class TestOptimizer (line 105) | class TestOptimizer: method test_basic_functionality (line 106) | def test_basic_functionality(self, enron_eval_tiny, opt_strategy): method test_simple_max_quality_convert (line 123) | def test_simple_max_quality_convert(self, enron_eval_tiny, email_schem... method test_simple_min_cost_convert (line 147) | def test_simple_min_cost_convert(self, enron_eval_tiny, email_schema, ... method test_simple_min_time_convert (line 166) | def test_simple_min_time_convert(self, enron_eval_tiny, email_schema, ... method test_simple_vllm_convert (line 185) | def test_simple_vllm_convert(self, enron_eval_tiny, email_schema, opt_... method test_push_down_filter (line 205) | def test_push_down_filter(self, enron_eval_tiny, email_schema, opt_str... method test_push_down_two_filters (line 226) | def test_push_down_two_filters(self, enron_eval_tiny, email_schema, op... method test_small_real_estate_logical_reorder (line 249) | def test_small_real_estate_logical_reorder(self, small_real_estate_wor... method test_real_estate_logical_reorder (line 272) | def test_real_estate_logical_reorder(self, real_estate_workload, opt_s... method test_seven_filters (line 297) | def test_seven_filters(self, enron_eval_tiny, email_schema, opt_strate... class MockSampleBasedCostModel (line 342) | class MockSampleBasedCostModel: method __init__ (line 345) | def __init__(self, operator_to_stats): method get_costed_full_op_ids (line 358) | def get_costed_full_op_ids(self): method __call__ (line 361) | def __call__( class TestParetoOptimizer (line 451) | class TestParetoOptimizer: method test_pareto_optimization_strategy (line 452) | def test_pareto_optimization_strategy(self, workload, policy, operator... FILE: tests/pytest/test_physical.py class SimpleSchema (line 15) | class SimpleSchema(BaseModel): class SimpleSchemaTwo (line 19) | class SimpleSchemaTwo(BaseModel): function test_physical_operator_init (line 24) | def test_physical_operator_init(): function test_physical_operator_equality (line 41) | def test_physical_operator_equality(): function test_physical_operator_str (line 52) | def test_physical_operator_str(): function test_physical_operator_id_generation (line 64) | def test_physical_operator_id_generation(): function test_physical_operator_copy (line 82) | def test_physical_operator_copy(): FILE: tests/pytest/test_records.py class TestSchema (line 11) | class TestSchema(BaseModel): class TestDataRecord (line 16) | class TestDataRecord: method sample_record (line 18) | def sample_record(self): method sample_df (line 24) | def sample_df(self): method test_create_record (line 31) | def test_create_record(self, sample_record): method test_record_equality (line 37) | def test_record_equality(self, sample_record): method test_to_df (line 42) | def test_to_df(self, sample_df): method test_to_df_with_project_cols (line 51) | def test_to_df_with_project_cols(self, sample_df): method test_invalid_attribute (line 60) | def test_invalid_attribute(self, sample_record): method test_to_dict (line 65) | def test_to_dict(self, sample_record): method test_to_json_str (line 71) | def test_to_json_str(self, sample_record): FILE: tests/pytest/test_rules.py function schema (line 12) | def schema(): function base_scan_op (line 19) | def base_scan_op(schema): function test_substitute_methods (line 25) | def test_substitute_methods(base_scan_op): FILE: tests/pytest/test_scan.py class List (line 9) | class List(BaseModel): function test_marshal_and_scan_memory_source (line 13) | def test_marshal_and_scan_memory_source(): FILE: tests/pytest/test_schemas.py class Dog (line 15) | class Dog(BaseModel): class Cat (line 19) | class Cat(BaseModel): function test_schema_equality (line 23) | def test_schema_equality(): function test_get_schema_field_names (line 27) | def test_get_schema_field_names(): function test_project_schema (line 31) | def test_project_schema(): function test_create_schema_from_fields (line 40) | def test_create_schema_from_fields(): function test_create_schema_from_df (line 51) | def test_create_schema_from_df(): function test_union_schemas (line 67) | def test_union_schemas(): FILE: website/src/components/HomepageFeatures/index.tsx type FeatureItem (line 6) | type FeatureItem = { function Feature (line 46) | function Feature({title, Svg, description}: FeatureItem) { function HomepageFeatures (line 60) | function HomepageFeatures(): ReactNode { FILE: website/src/components/ResearchPage/admonitions.tsx type AbstractProps (line 3) | interface AbstractProps { function Abstract (line 9) | function Abstract({ children }: AbstractProps) { FILE: website/src/pages/index.tsx function HomepageHeader (line 11) | function HomepageHeader() { function Home (line 32) | function Home(): ReactNode {