Full Code of mitdbg/palimpzest for AI

main 807ed301c4d2 cached

250 files

3.6 MB

957.3k tokens

1755 symbols

1 requests

Download .txt

Showing preview only (3,828K chars total). Download the full file or copy to clipboard to get everything.

Repository: mitdbg/palimpzest
Branch: main
Commit: 807ed301c4d2
Files: 250
Total size: 3.6 MB

Directory structure:
gitextract_v67onyjf/

├── .github/
│   └── workflows/
│       ├── ci.yaml
│       ├── docs.yaml
│       ├── package.yaml
│       └── test-docs.yaml
├── .gitignore
├── LICENSE
├── README.md
├── abacus-research/
│   ├── README.md
│   ├── README_CUAD_LOCAL.md
│   ├── biodex-ablation.py
│   ├── biodex-demo.py
│   ├── biodex-max-quality-at-cost.py
│   ├── biodex-min-at-fixed-quality.py
│   ├── biodex-pareto-cascades.py
│   ├── biodex-priors-cascades.json
│   ├── biodex-priors.json
│   ├── biodex-revision-priors-maxquality.json
│   ├── biodex-revision-priors-mincost.json
│   ├── cheap-priors-cascades.json
│   ├── cheap-priors.json
│   ├── cuad-demo.py
│   ├── cuad-max-quality-at-cost.py
│   ├── cuad-priors.json
│   ├── cuad_data_loader.py
│   ├── download_embeddings_and_mmqa.sh
│   ├── helper-scripts/
│   │   ├── biodex-gen-index.py
│   │   ├── generate-prior-stats-biodex-first-convert.py
│   │   ├── generate-prior-stats-biodex.py
│   │   ├── generate-prior-stats-cuad.py
│   │   ├── mmqa-baseline.py
│   │   ├── mmqa-gen-image-index.py
│   │   ├── mmqa-gen-image-title-index.py
│   │   ├── mmqa-gen-table-index.py
│   │   └── mmqa-gen-text-index.py
│   ├── mmqa-complex-demo.py
│   ├── mmqa-demo.py
│   ├── run_ablation_study.sh
│   ├── run_biodex.sh
│   ├── run_biodex_cascades.sh
│   ├── run_biodex_cost_threshold.sh
│   ├── run_biodex_min_cost_latency.sh
│   ├── run_biodex_priors.sh
│   ├── run_biodex_priors_constrained.sh
│   ├── run_cuad.sh
│   ├── run_cuad_cost_threshold.sh
│   ├── run_cuad_min_cost_latency.sh
│   ├── run_cuad_priors.sh
│   ├── run_cuad_priors_constrained.sh
│   ├── run_mmqa.sh
│   ├── run_mmqa_complex.sh
│   ├── run_mmqa_complex_min_cost_latency.sh
│   ├── run_mmqa_min_cost_latency.sh
│   ├── score_biodex.py
│   ├── score_cuad.py
│   ├── score_mmqa.py
│   ├── score_mmqa_complex.py
│   └── setup_cuad_data.py
├── demos/
│   ├── audio-demo.py
│   ├── caching-demo.py
│   ├── demo_core.py
│   ├── enron-demo.py
│   ├── image-demo.py
│   ├── join-data/
│   │   └── animal-texts/
│   │       ├── animal1.txt
│   │       ├── animal2.txt
│   │       ├── animal3.txt
│   │       ├── animal4.txt
│   │       ├── animal5.txt
│   │       └── animal6.txt
│   ├── join-demo.py
│   ├── paper-demo.py
│   ├── real-estate-demo.py
│   ├── simple-demo.py
│   └── vllm-demo.py
├── evals/
│   └── quest/
│       └── eval.py
├── pyproject.toml
├── quickstart.ipynb
├── ruff.toml
├── scripts/
│   ├── capture_litellm_stats.py
│   ├── capture_provider_stats.py
│   ├── generate_test_messages.py
│   └── update_model_info.py
├── src/
│   └── palimpzest/
│       ├── __init__.py
│       ├── agents/
│       │   ├── __init__.py
│       │   ├── compute_agents.py
│       │   └── search_agents.py
│       ├── constants.py
│       ├── core/
│       │   ├── __init__.py
│       │   ├── data/
│       │   │   ├── __init__.py
│       │   │   ├── context.py
│       │   │   ├── context_manager.py
│       │   │   ├── dataset.py
│       │   │   ├── index_dataset.py
│       │   │   └── iter_dataset.py
│       │   ├── elements/
│       │   │   ├── __init__.py
│       │   │   ├── filters.py
│       │   │   ├── groupbysig.py
│       │   │   └── records.py
│       │   ├── lib/
│       │   │   ├── __init__.py
│       │   │   └── schemas.py
│       │   └── models.py
│       ├── policy.py
│       ├── prompts/
│       │   ├── __init__.py
│       │   ├── agent_prompts.py
│       │   ├── aggregate_prompts.py
│       │   ├── context_search.py
│       │   ├── convert_prompts.py
│       │   ├── critique_and_refine_prompts.py
│       │   ├── filter_prompts.py
│       │   ├── join_prompts.py
│       │   ├── moa_aggregator_prompts.py
│       │   ├── moa_proposer_prompts.py
│       │   ├── prompt_factory.py
│       │   ├── prompt_manager.py
│       │   ├── split_merge_prompts.py
│       │   ├── split_proposer_prompts.py
│       │   ├── utils.py
│       │   └── validator.py
│       ├── query/
│       │   ├── __init__.py
│       │   ├── execution/
│       │   │   ├── __init__.py
│       │   │   ├── all_sample_execution_strategy.py
│       │   │   ├── execution_strategy.py
│       │   │   ├── execution_strategy_type.py
│       │   │   ├── mab_execution_strategy.py
│       │   │   ├── parallel_execution_strategy.py
│       │   │   └── single_threaded_execution_strategy.py
│       │   ├── generators/
│       │   │   ├── __init__.py
│       │   │   ├── gemini_client.py
│       │   │   └── generators.py
│       │   ├── operators/
│       │   │   ├── __init__.py
│       │   │   ├── aggregate.py
│       │   │   ├── compute.py
│       │   │   ├── convert.py
│       │   │   ├── critique_and_refine.py
│       │   │   ├── distinct.py
│       │   │   ├── filter.py
│       │   │   ├── join.py
│       │   │   ├── limit.py
│       │   │   ├── logical.py
│       │   │   ├── mixture_of_agents.py
│       │   │   ├── physical.py
│       │   │   ├── project.py
│       │   │   ├── rag.py
│       │   │   ├── scan.py
│       │   │   ├── search.py
│       │   │   ├── split.py
│       │   │   └── topk.py
│       │   ├── optimizer/
│       │   │   ├── __init__.py
│       │   │   ├── cost_model.py
│       │   │   ├── optimizer.py
│       │   │   ├── optimizer_strategy.py
│       │   │   ├── optimizer_strategy_type.py
│       │   │   ├── plan.py
│       │   │   ├── primitives.py
│       │   │   ├── rules.py
│       │   │   └── tasks.py
│       │   └── processor/
│       │       ├── __init__.py
│       │       ├── config.py
│       │       ├── query_processor.py
│       │       └── query_processor_factory.py
│       ├── schemabuilder/
│       │   ├── __init__.py
│       │   └── schema_builder.py
│       ├── tools/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── allenpdf.py
│       │   ├── pdfparser.py
│       │   └── skema_tools.py
│       ├── utils/
│       │   ├── __init__.py
│       │   ├── env_helpers.py
│       │   ├── hash_helpers.py
│       │   ├── model_helpers.py
│       │   ├── model_info_helpers.py
│       │   ├── progress.py
│       │   ├── pz_models_information.json
│       │   └── udfs.py
│       └── validator/
│           ├── __init__.py
│           └── validator.py
├── testdata/
│   ├── README.md
│   ├── download-testdata.sh
│   ├── enron-eval-medium-labels.json
│   └── target_matching.csv
├── tests/
│   └── pytest/
│       ├── README.md
│       ├── conftest.py
│       ├── data/
│       │   ├── email_schema.json
│       │   ├── email_schema.yml
│       │   ├── synapse_schema.csv
│       │   └── synapse_schema.jsonld
│       ├── fixtures/
│       │   ├── champion_outputs.py
│       │   ├── datasets.py
│       │   ├── execution_data.py
│       │   ├── expected_physical_plans.py
│       │   ├── expected_qualities.py
│       │   ├── expected_records.py
│       │   ├── models.py
│       │   ├── operator_to_stats.py
│       │   ├── physical_plans.py
│       │   ├── schemas.py
│       │   ├── side_effects.py
│       │   └── workloads.py
│       ├── test_aggregate.py
│       ├── test_convert.py
│       ├── test_dataset.py
│       ├── test_distinct.py
│       ├── test_dynamic_models.py
│       ├── test_dynamicschema.py
│       ├── test_execution.py
│       ├── test_filter.py
│       ├── test_generator.py
│       ├── test_iter_dataset.py
│       ├── test_join.py
│       ├── test_map.py
│       ├── test_optimizer.py
│       ├── test_physical.py
│       ├── test_records.py
│       ├── test_rules.py
│       ├── test_scan.py
│       └── test_schemas.py
└── website/
    ├── .gitignore
    ├── README.md
    ├── blog/
    │   ├── 2024-06-01-palimpzest/
    │   │   ├── bibtex.js
    │   │   └── index.md
    │   ├── authors.yml
    │   └── tags.yml
    ├── docs/
    │   ├── api/
    │   │   └── overview.mdx
    │   ├── getting-started/
    │   │   ├── installation.mdx
    │   │   ├── next-steps.mdx
    │   │   └── quickstart.mdx
    │   ├── intro.mdx
    │   └── user-guide/
    │       ├── dataset.mdx
    │       ├── operators/
    │       │   ├── overview.mdx
    │       │   ├── relational.mdx
    │       │   ├── sem_agg.mdx
    │       │   ├── sem_filter.mdx
    │       │   ├── sem_join.mdx
    │       │   ├── sem_map.mdx
    │       │   └── sem_topk.mdx
    │       ├── optimization.mdx
    │       └── overview.mdx
    ├── docusaurus.config.ts
    ├── package.json
    ├── sidebars.ts
    ├── src/
    │   ├── components/
    │   │   ├── HomepageFeatures/
    │   │   │   ├── index.tsx
    │   │   │   └── styles.module.css
    │   │   └── ResearchPage/
    │   │       └── admonitions.tsx
    │   ├── css/
    │   │   └── custom.css
    │   └── pages/
    │       ├── index.module.css
    │       ├── index.tsx
    │       ├── palimpchat.mdx
    │       └── research.mdx
    ├── static/
    │   └── .nojekyll
    └── tsconfig.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yaml
================================================
name: PZ Merge Checks

on:
  pull_request:
    branches:
      - main

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.12'

    - name: Install dependencies
      run: |
        pip install --upgrade pip
        pip install .

    - name: Download and register testdata
      run: |
        pushd testdata
        wget -nc https://people.csail.mit.edu/gerarvit/PalimpzestData/enron-eval-tiny.tar.gz
        wget -nc https://people.csail.mit.edu/gerarvit/PalimpzestData/real-estate-eval-tiny.tar.gz
        tar -xzf enron-eval-tiny.tar.gz
        tar -xzf real-estate-eval-tiny.tar.gz
        rm *.tar.gz
        popd

    - name: Test with pytest
      env:
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
        ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      run: |
        export CI=true
        export NO_GEMINI=true
        pip install pytest
        pytest -v tests/pytest

  lint-and-format:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.x'
    - name: Install the code linting and formatting tool Ruff
      run: pip install "ruff>=0.9.0"
    - name: check version
      run: ruff --version
    - name: Lint code with Ruff
      run: ruff check --output-format=github --target-version=py38
    - name: Check code formatting with Ruff
      run: ruff check --no-fix . --target-version=py38
      continue-on-error: true

  check-version-bump:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Check Version Increased
      run: |
        git fetch --prune --unshallow
        git checkout ${{ github.event.pull_request.base.sha }}
        VERSION=`cat pyproject.toml | grep '^version' | sed -E 's/version.*=.*\"(.*)".*/\1/'`
        echo "Current version is $VERSION"
        git checkout ${{ github.event.pull_request.head.sha }}
        VERSION_PR=`cat pyproject.toml | grep '^version' | sed -E 's/version.*=.*\"(.*)".*/\1/'`
        echo "Version in PR is $VERSION_PR"
        if [ "$VERSION" = "$VERSION_PR" ]; then
          echo "Error: Version has not been bumped"
          exit 1
        fi


================================================
FILE: .github/workflows/docs.yaml
================================================
name: Deploy Docs to GitHub Pages

on:
  push:
    branches:
      - main

permissions:
  contents: write

jobs:
  build:
    name: Build Docusaurus
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

      - name: Install dependencies
        run: |
          cd website
          npm ci
      - name: Build website
        run: |
          cd website
          npm run build
          echo "palimpzest.org" > build/CNAME
      - name: Upload Build Artifact
        uses: actions/upload-pages-artifact@v3
        with:
          path: website/build

  deploy:
    name: Deploy to GitHub Pages
    needs: build

    # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
    permissions:
      pages: write # to deploy to Pages
      id-token: write # to verify the deployment originates from an appropriate source

    # Deploy to the github-pages environment
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}

    runs-on: ubuntu-latest
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v4


================================================
FILE: .github/workflows/package.yaml
================================================
name: package

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.x'
    - name: Build Package
      run: |
        pip install --upgrade pip build
        python3 -m build
    - name: Store the distribution packages
      uses: actions/upload-artifact@v4
      with:
        name: python-package-distributions
        path: dist/

  publish:
    runs-on: ubuntu-latest
    name: Publish Package
    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
    needs:
    - build
    environment:
      name: pypi
      url: https://pypi.org/p/palimpzest
    permissions:
      id-token: write
    steps:
      - name: Download all the dists
        uses: actions/download-artifact@v4
        with:
          name: python-package-distributions
          path: dist/
      - name: Publish distribution to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1

  github-release:
    name: >-
      Sign distribution w/Sigstore and upload to GitHub Release
    needs:
    - publish
    runs-on: ubuntu-latest
    permissions:
      contents: write
      id-token: write
    steps:
    - name: Download all the dists
      uses: actions/download-artifact@v4
      with:
        name: python-package-distributions
        path: dist/
    - name: Sign the dists with Sigstore
      uses: sigstore/gh-action-sigstore-python@v3.0.0
      with:
        inputs: >-
          ./dist/*.tar.gz
          ./dist/*.whl
    - name: Create GitHub Release
      env:
        GITHUB_TOKEN: ${{ github.token }}
      run: |
        PKG_VERSION=`ls dist/ | head -n 1 | sed -E 's/.*palimpzest-([0-9]+\.[0-9]+\.[0-9]+)-.*/\1/'`
        gh release create "$PKG_VERSION" --repo "$GITHUB_REPOSITORY" --notes ""
    - name: Upload artifact signatures to GitHub Release
      env:
        GITHUB_TOKEN: ${{ github.token }}
      # Upload to GitHub Release using the `gh` CLI.
      # `dist/` contains the built packages, and the
      # sigstore-produced signatures and certificates.
      run: |
        PKG_VERSION=`ls dist/ | head -n 1 | sed -E 's/.*palimpzest-([0-9]+\.[0-9]+\.[0-9]+)-.*/\1/'`
        gh release upload "$PKG_VERSION" dist/** --repo "$GITHUB_REPOSITORY"


================================================
FILE: .github/workflows/test-docs.yaml
================================================
name: Test Building Docs

on:
  pull_request:
    branches:
      - main

jobs:
  test-deploy:
    name: Test deployment
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

      - name: Install dependencies
        run: |
          cd website
          npm ci
      - name: Test build website
        run: |
          cd website
          npm run build


================================================
FILE: .gitignore
================================================
docs/site/
*.zip
.cache/
.env
build/*
docs/build/*
docs/source/generated/*
dist/*
.vscode/*
.idea/*
.chroma
.chroma-biodex
.chroma-mmqa
.ragatouille
plots/
paper-imgs/

# testdata folders and archive files
testdata/enron-tiny.csv
testdata/*/
testdata/*.tar.gz
tests/pytest/data/generator_messages/
scripts/provider_stats/
scripts/litellm_stats/

# python artifacts
*.egg-info
**/__pycache__/

# other
.DS_Store

# logs
*.log

# virtual environment(s)
venv/
uv.lock

# tmp
testdata/maildir/
testdata/real-estate-eval-100.tar

# jupyter
.ipynb_checkpoints/

# evaluation
old-eval-results/
eval-results/

testdata/enron-eval/*.txt

# your zed using open source contributor who only installs in a virtual environment
.venv
.zed
pyrightconfig.json

myenv/
pz-env/

# abacus-research data
abacus-research/cuad-data/*
abacus-research/opt-profiling-data/*
abacus-research/parse-answer-errors/*

# stats
scripts/litellm_stats/
scripts/provider_stats/
tests/pytest/data/generator_messages/


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 MIT Data Systems Group

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
![pz-banner](https://palimpzest-workloads.s3.us-east-1.amazonaws.com/palimpzest-cropped.png)

# Palimpzest (PZ)
[![Discord](https://img.shields.io/discord/1245561987480420445?logo=discord)](https://discord.gg/dN85JJ6jaH)
[![Docs](https://img.shields.io/badge/Read_the_Docs-purple?logo=readthedocs)](https://palimpzest.org/)
[![Colab Demo](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Fm8I4yL1az395MsFkQbEIZSmUZs0oGvZ?usp=sharing)
[![PyPI](https://img.shields.io/pypi/v/palimpzest)](https://pypi.org/project/palimpzest/)
[![PyPI - Monthly Downloads](https://img.shields.io/pypi/dm/palimpzest?color=teal)](https://pypi.org/project/palimpzest/)
<!-- [![Paper](https://img.shields.io/badge/Paper-arXiv-b31b1b?logo=arxiv)](https://arxiv.org/pdf/2405.14696) -->
<!-- [![Video](https://img.shields.io/badge/YouTube-Talk-red?logo=youtube)](https://youtu.be/T8VQfyBiki0?si=eiph57DSEkDNbEIu) -->

## 📚 Learn How to Use PZ
Our [full documentation](https://palimpzest.org) is the definitive resource for learning how to use PZ. It contains all of the installation and quickstart materials on this page, as well as user guides, full API documentation (coming soon), and much more.

## 🚀 Getting started
You can find a stable version of the PZ package on PyPI [here](https://pypi.org/project/palimpzest/). To install the package, run:
```bash
$ pip install palimpzest
```

You can also install PZ with [uv](https://docs.astral.sh/uv/) for a faster installation:
```bash
$ uv pip install palimpzest
```

Alternatively, to install the latest version of the package from this repository, you can clone this repository and run the following commands:
```bash
$ git clone git@github.com:mitdbg/palimpzest.git
$ cd palimpzest
$ pip install .
```

## 🙋🏽 Join the PZ Community
We are actively hacking on PZ and would love to have you join our community [![Discord](https://img.shields.io/discord/1245561987480420445?logo=discord)](https://discord.gg/dN85JJ6jaH)

[Our Discord server](https://discord.gg/dN85JJ6jaH) is the best place to:
- Get help with your PZ program(s)
- Give feedback to the maintainers
- Discuss the future direction(s) of the project
- Discuss anything related to data processing with LLMs!

We are eager to learn more about your workloads and use cases, and will take them into consideration in planning our future roadmap.

### 📓 Citation
If you would like to cite our original paper on Palimpzest, please use the following citation:
```
@inproceedings{palimpzestCIDR,
    title={Palimpzest: Optimizing AI-Powered Analytics with Declarative Query Processing},
    author={Liu, Chunwei and Russo, Matthew and Cafarella, Michael and Cao, Lei and Chen, Peter Baile and Chen, Zui and Franklin, Michael and Kraska, Tim and Madden, Samuel and Shahout, Rana and Vitagliano, Gerardo},
    booktitle = {Proceedings of the {{Conference}} on {{Innovative Database Research}} ({{CIDR}})},
    date = 2025,
}
```

If you would like to cite our paper on Palimpzest's optimizer Abacus, please use the following citation:
```
@misc{russo2025abacuscostbasedoptimizersemantic,
      title={Abacus: A Cost-Based Optimizer for Semantic Operator Systems}, 
      author={Matthew Russo and Sivaprasad Sudhir and Gerardo Vitagliano and Chunwei Liu and Tim Kraska and Samuel Madden and Michael Cafarella},
      year={2025},
      eprint={2505.14661},
      archivePrefix={arXiv},
      primaryClass={cs.DB},
      url={https://arxiv.org/abs/2505.14661}, 
}
```


================================================
FILE: abacus-research/README.md
================================================
## Chroma Embeddings and MMQA files
You can download the chroma embeddings we computed for MMQA and BioDEX by executing the following:
```sh
$ ./download_embeddings_and_mmqa.sh
```
This folder also contains questions for the different splits of MMQA -- of which we only use `MMQA_dev.jsonl` for scoring PZ's output. If you need the full MMQA dataset for any reason (e.g. to visualize at which images are being retrieved by a pipeline), you can find it here: https://github.com/allenai/multimodalqa/tree/master.

## Table 2
The following scripts create the data for Abacus in Table 2 in our Abacus paper.
- `run_biodex.sh`
- `run_cuad.sh`
- `run_mmqa_complex.sh`

## Table 3
The following scripts create the data for Abacus in Table 3 in our Abacus paper.
- `run_biodex_min_cost_latency.sh`
- `run_cuad_min_cost_latency.sh`
- `run_mmqa_complex_min_cost_latency.sh`

## Figure 6
The following scripts create the data for Figure 6 in our Abacus paper.
- `run_biodex_priors.sh`
- `run_biodex_priors_constrained.sh`
- `run_cuad_priors.sh`
- `run_cuad_priors_constrained.sh`

## Figure 7
The `run_biodex_cost_threshold.sh` and `run_cuad_cost_threshold.sh` scripts create the data for Figure 6 in our Abacus paper.

## Figure 8
The `run_ablation_study.sh` script creates the data for Figure 8 in our Abacus paper.

================================================
FILE: abacus-research/README_CUAD_LOCAL.md
================================================
# CUAD Local Data Setup and Usage

## Setup

Since HuggingFace datasets no longer supports loading scripts, we've created a local data loading solution.

### 1. Download CUAD Data

First, run the setup script to download CUAD data to a local directory:

```bash
python setup_cuad_data.py
```

This will:
- Create a `cuad-data/` directory
- Download the CUAD dataset files (train and test JSON files)
- Download the original dataset script from HuggingFace for reference

### 2. Updated Scripts

The following scripts have been updated to use local data via `cuad_data_loader.py`:

- **cuad-demo.py**
- **cuad-max-quality-at-cost.py**

### 3. Running the Scripts

#### Basic CUAD Demo
```bash
# Make sure OPENAI_API_KEY is set in .env or environment
source ../.env && export OPENAI_API_KEY

# Run from abacus-research directory
seed=0
exp_name="cuad-final-mab-k6-j4-budget50-seed${seed}"
python cuad-demo.py --k 6 --j 4 --sample-budget 50 --seed $seed --exp-name $exp_name --gpt4-mini-only
```

#### Max Quality at Cost
```bash
python cuad-max-quality-at-cost.py --constrained --gpt4-mini-only
```

================================================
FILE: abacus-research/biodex-ablation.py
================================================
import argparse
import json
import os
import time

import chromadb
import datasets
from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction

import palimpzest as pz
from palimpzest.constants import Model

biodex_entry_cols = [
    {"name": "pmid", "type": str, "desc": "The PubMed ID of the medical paper"},
    {"name": "title", "type": str, "desc": "The title of the medical paper"},
    {"name": "abstract", "type": str, "desc": "The abstract of the medical paper"},
    {"name": "fulltext", "type": str, "desc": "The full text of the medical paper, which contains information relevant for creating a drug safety report."},
]

biodex_reactions_cols = [
    {"name": "reactions", "type": list[str], "desc": "The list of all medical conditions experienced by the patient as discussed in the report. Try to provide as many relevant medical conditions as possible."},
]

biodex_reaction_labels_cols = [
    {"name": "reaction_labels", "type": list[str], "desc": "Official terms for medical conditions listed in `reactions`"},
]

biodex_ranked_reactions_labels_cols = [
    {"name": "ranked_reaction_labels", "type": list[str], "desc": "The ranked list of medical conditions experienced by the patient. The most relevant label occurs first in the list. Be sure to rank ALL of the inputs."},
]

class BiodexValidator(pz.Validator):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__()

        # read dataset and prepare entries
        dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="train").to_pandas()
        if shuffle:
            dataset = dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            dataset = dataset.to_dict(orient="records")[:num_samples]

        # compute mapping from pmid --> label (i.e. reactions list)
        self.pmid_to_label = self._compute_pmid_to_label(dataset)

        # store rp_at_k for computing rank-precision at k metric
        self.k = rp_at_k

    def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
        """Compute the label for a BioDEX report given its entry in the dataset."""
        pmid_to_label = {}
        for entry in dataset:
            pmid = str(entry["pmid"])
            reactions_lst = [
                reaction.strip().lower().replace("'", "").replace("^", "")
                for reaction in entry["reactions"].split(",")
            ]
            pmid_to_label[pmid] = reactions_lst

        return pmid_to_label

    def rank_precision_at_k(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # lower-case each list
            preds = [pred.strip().lower().replace("'", "").replace("^", "") for pred in preds]
            targets = set([target.strip().lower().replace("'", "").replace("^", "") for target in targets])

            # compute rank-precision at k
            rn = len(targets)
            denom = min(self.k, rn)
            total = 0.0
            for i in range(self.k):
                total += preds[i] in targets if i < len(preds) else 0.0

            return total / denom

        except Exception:
            os.makedirs("rp@k-errors", exist_ok=True)
            ts = time.time()
            with open(f"rp@k-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def term_recall(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # normalize terms in each list
            pred_terms = set([
                term.strip()
                for pred in preds
                for term in pred.lower().replace("'", "").replace("^", "").split(" ")
            ])
            target_terms = ([
                term.strip()
                for target in targets
                for term in target.lower().replace("'", "").replace("^", "").split(" ")
            ])

            # compute term recall and return
            intersect = pred_terms.intersection(target_terms)
            term_recall = len(intersect) / len(target_terms)

            return term_recall

        except Exception:
            os.makedirs("term-recall-eval-errors", exist_ok=True)
            ts = time.time()
            with open(f"term-recall-eval-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reactions":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.term_recall(preds, targets)
        elif field_name == "ranked_reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.rank_precision_at_k(preds, targets)
        else:
            raise NotImplementedError(f"Validator.map_score_fn not implemented for field {field_name}.")

    def topk_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[input_record["pmid"]]
            return self.term_recall(preds, targets)
        else:
            raise NotImplementedError(f"Validator.topk_score_fn not implemented for field {field_name}.")


class BiodexDataset(pz.IterDataset):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        split: str = "test",
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__(id="biodex", schema=biodex_entry_cols)

        self.dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split=split).to_pandas()
        if shuffle:
            self.dataset = self.dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            self.dataset = self.dataset.to_dict(orient="records")[:num_samples]

        self.rp_at_k = rp_at_k
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed
        self.split = split

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx: int):
        # get entry
        entry = self.dataset[idx]

        # get input fields
        pmid = entry["pmid"]
        title = entry["title"]
        abstract = entry["abstract"]
        fulltext = entry["fulltext"]

        # create item with fields
        item = {"pmid": pmid, "title": title, "abstract": abstract, "fulltext": fulltext}

        return item


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description="Run a simple demo")
    parser.add_argument(
        "--optimizer-strategy",
        default="pareto",
        type=str,
        help="The optimizer strategy to use. One of pareto or greedy",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed used to initialize RNG for MAB sampling algorithm",
    )
    parser.add_argument(
        "--k",
        default=10,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--j",
        default=3,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--sample-budget",
        default=100,
        type=int,
        help="Total sample budget in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--exp-name",
        default=None,
        type=str,
        help="The experiment name.",
    )
    parser.add_argument(
        "--policy",
        default=None,
        type=str,
        help="The policy (one of 'mincost' or 'maxquality').",
    )
    parser.add_argument(
        "--priors-file",
        default=None,
        type=str,
        help="A file with a dictionary mapping physical operator ids to prior belief on their performance",
    )
    args = parser.parse_args()

    # create directory for profiling data
    os.makedirs("ablation-data", exist_ok=True)

    seed = args.seed
    k = args.k
    j = args.j
    sample_budget = args.sample_budget
    optimizer_strategy = args.optimizer_strategy
    exp_name = args.exp_name
    priors = None
    if args.priors_file is not None and os.path.exists(args.priors_file):
        with open(args.priors_file) as f:
            priors = json.load(f)

    # set the optimization policy; constraint set to 80% of mean quality from unconstrained plans (Table 2)
    policy = (
        pz.MinCostAtFixedQuality(min_quality=0.8 * 0.261)
        if args.policy == "mincost"
        else pz.MaxQualityAtFixedCost(max_cost=0.5 * 0.7)
    )
    print(f"USING POLICY: {policy}")

    if os.getenv("OPENAI_API_KEY") is None and os.getenv("TOGETHER_API_KEY") is None and os.getenv("ANTHROPIC_API_KEY") is None:
        print("WARNING: OPENAI_API_KEY, TOGETHER_API_KEY, and ANTHROPIC_API_KEY are unset")

    # create validator
    validator = BiodexValidator(
        rp_at_k=5,
        num_samples=20,
        shuffle=True,
        seed=seed,
    )

    # create train dataset for validator
    train_dataset = BiodexDataset(
        split="train",
        num_samples=20,
        shuffle=True,
        seed=seed,
    )
    train_dataset = {train_dataset.id: train_dataset}

    # load index [text-embedding-3-small]
    chroma_client = chromadb.PersistentClient(".chroma-biodex")
    openai_ef = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-3-small",
    )
    index = chroma_client.get_collection("biodex-reaction-terms", embedding_function=openai_ef)

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        # execute query with embeddings
        results = index.query(query, n_results=5)

        # get list of result terms with their cosine similarity scores
        final_results = []
        for query_docs, query_distances in zip(results["documents"], results["distances"]):
            for doc, dist in zip(query_docs, query_distances):
                cosine_similarity = 1 - dist
                final_results.append({"content": doc, "similarity": cosine_similarity})

        # sort the results by similarity score
        sorted_results = sorted(final_results, key=lambda result: result["similarity"], reverse=True)

        # remove duplicates
        sorted_results_set = set()
        final_sorted_results = []
        for result in sorted_results:
            if result["content"] not in sorted_results_set:
                sorted_results_set.add(result["content"])
                final_sorted_results.append(result["content"])

        # return the top-k similar results and generation stats
        return {"reaction_labels": final_sorted_results[:k]}

    # construct plan
    plan = BiodexDataset(split="test", num_samples=250, shuffle=True, seed=seed)
    plan = plan.sem_map(biodex_reactions_cols)
    plan = plan.sem_topk(
        index=index,
        search_func=search_func,
        search_attr="reactions",
        output_attrs=biodex_reaction_labels_cols,
    )
    plan = plan.sem_map(biodex_ranked_reactions_labels_cols, depends_on=["title", "abstract", "fulltext", "reaction_labels"])

    # set models
    models = [
        Model.GPT_4o,
        Model.GPT_4o_MINI,
        Model.LLAMA3_1_8B,
        Model.LLAMA3_3_70B,
        # Model.MIXTRAL,  # NOTE: only available in tag `abacus-paper-experiments`
        # Model.DEEPSEEK_R1_DISTILL_QWEN_1_5B,
    ]

    # execute pz plan
    config = pz.QueryProcessorConfig(
        policy=policy,
        optimizer_strategy=optimizer_strategy,
        execution_strategy="parallel",
        use_final_op_quality=True,
        max_workers=64,
        available_models=models,
        allow_bonded_query=True,
        allow_critic=True,
        allow_mixtures=True,
        allow_rag_reduction=True,
        progress=True,
        k=k,
        j=j,
        sample_budget=sample_budget,
        # sample_cost_budget=0.10,
        seed=seed,
        exp_name=exp_name,
        priors=priors,
        dont_use_priors=(priors is None),
    )

    data_record_collection = plan.optimize_and_run(config=config, train_dataset=train_dataset, validator=validator)

    print(data_record_collection.to_df())
    data_record_collection.to_df().to_csv(f"ablation-data/{exp_name}-output.csv", index=False)

    # create filepaths for records and stats
    records_path = f"ablation-data/{exp_name}-records.json"
    stats_path = f"ablation-data/{exp_name}-profiling.json"

    # save record outputs
    record_jsons = []
    for record in data_record_collection:
        record_dict = record.to_dict()
        record_dict = {
            k: v
            for k, v in record_dict.items()
            if k in ["pmid", "reactions", "reaction_labels", "ranked_reaction_labels"]
        }
        record_jsons.append(record_dict)

    with open(records_path, "w") as f:
        json.dump(record_jsons, f)

    # save statistics
    execution_stats_dict = data_record_collection.execution_stats.to_json()
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)

    # score output
    test_dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="test").to_pandas()
    test_dataset = test_dataset.sample(n=250, random_state=seed).to_dict(orient="records")

    # construct mapping from pmid --> label (field, value) pairs
    def compute_target_record(entry):
        reactions_lst = [
            reaction.strip().lower().replace("'", "").replace("^", "")
            for reaction in entry["reactions"].split(",")
        ]
        label_dict = {"ranked_reaction_labels": reactions_lst}
        return label_dict

    label_fields_to_values = {
        entry["pmid"]: compute_target_record(entry) for entry in test_dataset
    }

    def rank_precision_at_k(preds: list, targets: list, k: int):
        if preds is None:
            return 0.0

        # lower-case each list
        preds = [pred.lower().replace("'", "").replace("^", "") for pred in preds]
        targets = set([target.lower().replace("'", "").replace("^", "") for target in targets])

        # compute rank-precision at k
        rn = len(targets)
        denom = min(k, rn)
        total = 0.0
        for i in range(k):
            total += preds[i] in targets if i < len(preds) else 0.0

        return total / denom

    def compute_avg_rp_at_k(records, k=5):
        total_rp_at_k = 0
        bad = 0
        for record in records:
            pmid = record['pmid']
            preds = record['ranked_reaction_labels']
            targets = label_fields_to_values[pmid]['ranked_reaction_labels']
            try:
                total_rp_at_k += rank_precision_at_k(preds, targets, k)
            except Exception:
                bad += 1

        return total_rp_at_k / len(records), bad

    rp_at_k, bad = compute_avg_rp_at_k(record_jsons, k=5)
    final_plan_id = list(data_record_collection.execution_stats.plan_stats.keys())[0]
    final_plan_str = data_record_collection.execution_stats.plan_strs[final_plan_id]
    stats_dict = {
        "rp@5": rp_at_k,
        "optimization_time": data_record_collection.execution_stats.optimization_time,
        "optimization_cost": data_record_collection.execution_stats.optimization_cost,
        "plan_execution_time": data_record_collection.execution_stats.plan_execution_time,
        "plan_execution_cost": data_record_collection.execution_stats.plan_execution_cost,
        "total_execution_time": data_record_collection.execution_stats.total_execution_time,
        "total_execution_cost": data_record_collection.execution_stats.total_execution_cost,
        "plan_str": final_plan_str,
    }
    with open(f"ablation-data/{exp_name}-metrics.json", "w") as f:
        json.dump(stats_dict, f)

    print(f"bad: {bad}")
    print("-------")
    print(f"rp@k: {rp_at_k:.5f}")
    print(f"Optimization time: {data_record_collection.execution_stats.optimization_time}")
    print(f"Optimization cost: {data_record_collection.execution_stats.optimization_cost}")
    print(f"Plan Exec. time: {data_record_collection.execution_stats.plan_execution_time}")
    print(f"Plan Exec. cost: {data_record_collection.execution_stats.plan_execution_cost}")
    print(f"Total Execution time: {data_record_collection.execution_stats.total_execution_time}")
    print(f"Total Execution Cost: {data_record_collection.execution_stats.total_execution_cost}")


================================================
FILE: abacus-research/biodex-demo.py
================================================
import argparse
import json
import os
import time

import chromadb
import datasets
from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction

import palimpzest as pz
from palimpzest.constants import Model

biodex_entry_cols = [
    {"name": "pmid", "type": str, "desc": "The PubMed ID of the medical paper"},
    {"name": "title", "type": str, "desc": "The title of the medical paper"},
    {"name": "abstract", "type": str, "desc": "The abstract of the medical paper"},
    {"name": "fulltext", "type": str, "desc": "The full text of the medical paper, which contains information relevant for creating a drug safety report."},
]

biodex_reactions_cols = [
    {"name": "reactions", "type": list[str], "desc": "The list of all medical conditions experienced by the patient as discussed in the report. Try to provide as many relevant medical conditions as possible."},
]

biodex_reaction_labels_cols = [
    {"name": "reaction_labels", "type": list[str], "desc": "Official terms for medical conditions listed in `reactions`"},
]

biodex_ranked_reactions_labels_cols = [
    {"name": "ranked_reaction_labels", "type": list[str], "desc": "The ranked list of medical conditions experienced by the patient. The most relevant label occurs first in the list. Be sure to rank ALL of the inputs."},
]

class BiodexValidator(pz.Validator):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__()

        # read dataset and prepare entries
        dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="train").to_pandas()
        if shuffle:
            dataset = dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            dataset = dataset.to_dict(orient="records")[:num_samples]

        # compute mapping from pmid --> label (i.e. reactions list)
        self.pmid_to_label = self._compute_pmid_to_label(dataset)

        # store rp_at_k for computing rank-precision at k metric
        self.k = rp_at_k

    def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
        """Compute the label for a BioDEX report given its entry in the dataset."""
        pmid_to_label = {}
        for entry in dataset:
            pmid = str(entry["pmid"])
            reactions_lst = [
                reaction.strip().lower().replace("'", "").replace("^", "")
                for reaction in entry["reactions"].split(",")
            ]
            pmid_to_label[pmid] = reactions_lst

        return pmid_to_label

    def rank_precision_at_k(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # lower-case each list
            preds = [pred.strip().lower().replace("'", "").replace("^", "") for pred in preds]
            targets = set([target.strip().lower().replace("'", "").replace("^", "") for target in targets])

            # compute rank-precision at k
            rn = len(targets)
            denom = min(self.k, rn)
            total = 0.0
            for i in range(self.k):
                total += preds[i] in targets if i < len(preds) else 0.0

            return total / denom

        except Exception:
            os.makedirs("rp@k-errors", exist_ok=True)
            ts = time.time()
            with open(f"rp@k-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def term_recall(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # normalize terms in each list
            pred_terms = set([
                term.strip()
                for pred in preds
                for term in pred.lower().replace("'", "").replace("^", "").split(" ")
            ])
            target_terms = ([
                term.strip()
                for target in targets
                for term in target.lower().replace("'", "").replace("^", "").split(" ")
            ])

            # compute term recall and return
            intersect = pred_terms.intersection(target_terms)
            term_recall = len(intersect) / len(target_terms)

            return term_recall

        except Exception:
            os.makedirs("term-recall-eval-errors", exist_ok=True)
            ts = time.time()
            with open(f"term-recall-eval-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reactions":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.term_recall(preds, targets)
        elif field_name == "ranked_reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.rank_precision_at_k(preds, targets)
        else:
            raise NotImplementedError(f"Validator.map_score_fn not implemented for field {field_name}.")

    def topk_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[input_record["pmid"]]
            return self.term_recall(preds, targets)
        else:
            raise NotImplementedError(f"Validator.topk_score_fn not implemented for field {field_name}.")


class BiodexDataset(pz.IterDataset):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        split: str = "test",
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__(id="biodex", schema=biodex_entry_cols)

        self.dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split=split).to_pandas()
        if shuffle:
            self.dataset = self.dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            self.dataset = self.dataset.to_dict(orient="records")[:num_samples]

        self.rp_at_k = rp_at_k
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed
        self.split = split

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx: int):
        # get entry
        entry = self.dataset[idx]

        # get input fields
        pmid = entry["pmid"]
        title = entry["title"]
        abstract = entry["abstract"]
        fulltext = entry["fulltext"]

        # create item with fields
        item = {"pmid": pmid, "title": title, "abstract": abstract, "fulltext": fulltext}

        return item


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description="Run a simple demo")
    parser.add_argument("--verbose", default=False, action="store_true", help="Print verbose output")
    parser.add_argument("--progress", default=False, action="store_true", help="Print progress output")
    parser.add_argument("--constrained", default=False, action="store_true", help="Use constrained objective")
    parser.add_argument("--gpt4-mini-only", default=False, action="store_true", help="Use only GPT-4o-mini")
    parser.add_argument(
        "--execution-strategy",
        default="parallel",
        type=str,
        help="The plan executor to use. One of sequential, pipelined, parallel",
    )
    parser.add_argument(
        "--sentinel-execution-strategy",
        default="mab",
        type=str,
        help="The sentinel execution strategy to use. One of mab or random",
    )
    parser.add_argument(
        "--policy",
        default="maxquality",
        type=str,
        help="One of 'mincost', 'mintime', 'maxquality'",
    )
    parser.add_argument(
        "--val-examples",
        default=25,
        type=int,
        help="Number of validation examples to sample from",
    )
    parser.add_argument(
        "--model",
        default="gpt-4o",
        type=str,
        help="One of 'gpt-4o', 'gpt-4o-mini', 'llama'",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed used to initialize RNG for MAB sampling algorithm",
    )
    parser.add_argument(
        "--k",
        default=10,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--j",
        default=3,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--sample-budget",
        default=100,
        type=int,
        help="Total sample budget in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--exp-name",
        default=None,
        type=str,
        help="The experiment name.",
    )
    parser.add_argument(
        "--priors-file",
        default=None,
        type=str,
        help="A file with a dictionary mapping physical operator ids to prior belief on their performance",
    )
    parser.add_argument(
        "--quality",
        default=None,
        type=float,
        help="Quality threshold",
    )

    args = parser.parse_args()

    # create directory for profiling data
    os.makedirs("opt-profiling-data", exist_ok=True)

    verbose = args.verbose
    progress = args.progress
    seed = args.seed
    val_examples = args.val_examples
    k = args.k
    j = args.j
    sample_budget = args.sample_budget
    execution_strategy = args.execution_strategy
    sentinel_execution_strategy = args.sentinel_execution_strategy
    exp_name = (
        f"biodex-final-{sentinel_execution_strategy}-k{k}-j{j}-budget{sample_budget}-seed{seed}"
        if args.exp_name is None
        else args.exp_name
    )
    priors = None
    if args.priors_file is not None:
        with open(args.priors_file) as f:
            priors = json.load(f)

    # set the optimization policy; constraint set to 25% percentile from unconstrained plans
    policy = pz.MaxQuality() if not args.constrained else pz.MaxQualityAtFixedCost(max_cost=2.250)
    if args.policy == "mincost":
        policy = pz.MinCost()
    elif args.policy == "minlatency":
        policy = pz.MinTime()
    elif args.quality is not None and args.policy == "mincostatfixedquality":
        policy = pz.MinCostAtFixedQuality(min_quality=args.quality)
    elif args.quality is not None and args.policy == "minlatencyatfixedquality":
        policy = pz.MinTimeAtFixedQuality(min_quality=args.quality)
    print(f"USING POLICY: {policy}")

    if os.getenv("OPENAI_API_KEY") is None and os.getenv("TOGETHER_API_KEY") is None and os.getenv("ANTHROPIC_API_KEY") is None:
        print("WARNING: OPENAI_API_KEY, TOGETHER_API_KEY, and ANTHROPIC_API_KEY are unset")

    # create validator
    validator = BiodexValidator(
        rp_at_k=5,
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )

    # create train dataset for validator
    train_dataset = BiodexDataset(
        split="train",
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )
    train_dataset = {train_dataset.id: train_dataset}

    # load index [text-embedding-3-small]
    chroma_client = chromadb.PersistentClient(".chroma-biodex")
    openai_ef = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-3-small",
    )
    index = chroma_client.get_collection("biodex-reaction-terms", embedding_function=openai_ef)

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        # execute query with embeddings
        results = index.query(query, n_results=5)

        # get list of result terms with their cosine similarity scores
        final_results = []
        for query_docs, query_distances in zip(results["documents"], results["distances"]):
            for doc, dist in zip(query_docs, query_distances):
                cosine_similarity = 1 - dist
                final_results.append({"content": doc, "similarity": cosine_similarity})

        # sort the results by similarity score
        sorted_results = sorted(final_results, key=lambda result: result["similarity"], reverse=True)

        # remove duplicates
        sorted_results_set = set()
        final_sorted_results = []
        for result in sorted_results:
            if result["content"] not in sorted_results_set:
                sorted_results_set.add(result["content"])
                final_sorted_results.append(result["content"])

        # return the top-k similar results and generation stats
        return {"reaction_labels": final_sorted_results[:k]}

    # construct plan
    plan = BiodexDataset(split="test", num_samples=250, shuffle=True, seed=seed)
    plan = plan.sem_map(biodex_reactions_cols)
    plan = plan.sem_topk(
        index=index,
        search_func=search_func,
        search_attr="reactions",
        output_attrs=biodex_reaction_labels_cols,
    )
    plan = plan.sem_map(biodex_ranked_reactions_labels_cols, depends_on=["title", "abstract", "fulltext", "reaction_labels"])

    # set models
    models = [Model.GPT_4o_MINI] if args.gpt4_mini_only else [
        Model.GPT_4o,
        Model.GPT_4o_MINI,
        Model.LLAMA3_1_8B,
        Model.LLAMA3_3_70B,
        # Model.MIXTRAL,  # NOTE: only available in tag `abacus-paper-experiments`
        Model.DEEPSEEK_R1_DISTILL_QWEN_1_5B,
    ]

    # execute pz plan
    config = pz.QueryProcessorConfig(
        policy=policy,
        optimizer_strategy="pareto",
        sentinel_execution_strategy=sentinel_execution_strategy,
        execution_strategy=execution_strategy,
        use_final_op_quality=True,
        max_workers=64,
        verbose=verbose,
        available_models=models,
        allow_bonded_query=True,
        allow_critic=True,
        allow_mixtures=True,
        allow_rag_reduction=True,
        progress=progress,
        k=k,
        j=j,
        sample_budget=sample_budget,
        # sample_cost_budget=0.10,
        seed=seed,
        exp_name=exp_name,
        priors=priors,
    )

    data_record_collection = plan.optimize_and_run(config=config, train_dataset=train_dataset, validator=validator)

    print(data_record_collection.to_df())
    data_record_collection.to_df().to_csv(f"opt-profiling-data/{exp_name}-output.csv", index=False)

    # create filepaths for records and stats
    records_path = f"opt-profiling-data/{exp_name}-records.json"
    stats_path = f"opt-profiling-data/{exp_name}-profiling.json"

    # save record outputs
    record_jsons = []
    for record in data_record_collection:
        record_dict = record.to_dict()
        record_dict = {
            k: v
            for k, v in record_dict.items()
            if k in ["pmid", "reactions", "reaction_labels", "ranked_reaction_labels"]
        }
        record_jsons.append(record_dict)

    with open(records_path, "w") as f:
        json.dump(record_jsons, f)

    # save statistics
    execution_stats_dict = data_record_collection.execution_stats.to_json()
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)

    # score output
    test_dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="test").to_pandas()
    test_dataset = test_dataset.sample(n=250, random_state=seed).to_dict(orient="records")

    # construct mapping from pmid --> label (field, value) pairs
    def compute_target_record(entry):
        reactions_lst = [
            reaction.strip().lower().replace("'", "").replace("^", "")
            for reaction in entry["reactions"].split(",")
        ]
        label_dict = {"ranked_reaction_labels": reactions_lst}
        return label_dict

    label_fields_to_values = {
        entry["pmid"]: compute_target_record(entry) for entry in test_dataset
    }

    def rank_precision_at_k(preds: list, targets: list, k: int):
        if preds is None:
            return 0.0

        # lower-case each list
        preds = [pred.lower().replace("'", "").replace("^", "") for pred in preds]
        targets = set([target.lower().replace("'", "").replace("^", "") for target in targets])

        # compute rank-precision at k
        rn = len(targets)
        denom = min(k, rn)
        total = 0.0
        for i in range(k):
            total += preds[i] in targets if i < len(preds) else 0.0

        return total / denom

    def compute_avg_rp_at_k(records, k=5):
        total_rp_at_k = 0
        bad = 0
        for record in records:
            pmid = record['pmid']
            preds = record['ranked_reaction_labels']
            targets = label_fields_to_values[pmid]['ranked_reaction_labels']
            try:
                total_rp_at_k += rank_precision_at_k(preds, targets, k)
            except Exception:
                bad += 1

        return total_rp_at_k / len(records), bad

    rp_at_k, bad = compute_avg_rp_at_k(record_jsons, k=5)
    final_plan_id = list(data_record_collection.execution_stats.plan_stats.keys())[0]
    final_plan_str = data_record_collection.execution_stats.plan_strs[final_plan_id]
    stats_dict = {
        "rp@5": rp_at_k,
        "optimization_time": data_record_collection.execution_stats.optimization_time,
        "optimization_cost": data_record_collection.execution_stats.optimization_cost,
        "plan_execution_time": data_record_collection.execution_stats.plan_execution_time,
        "plan_execution_cost": data_record_collection.execution_stats.plan_execution_cost,
        "total_execution_time": data_record_collection.execution_stats.total_execution_time,
        "total_execution_cost": data_record_collection.execution_stats.total_execution_cost,
        "plan_str": final_plan_str,
    }
    with open(f"opt-profiling-data/{exp_name}-metrics.json", "w") as f:
        json.dump(stats_dict, f)

    print(f"bad: {bad}")
    print("-------")
    print(f"rp@k: {rp_at_k:.5f}")
    print(f"Optimization time: {data_record_collection.execution_stats.optimization_time}")
    print(f"Optimization cost: {data_record_collection.execution_stats.optimization_cost}")
    print(f"Plan Exec. time: {data_record_collection.execution_stats.plan_execution_time}")
    print(f"Plan Exec. cost: {data_record_collection.execution_stats.plan_execution_cost}")
    print(f"Total Execution time: {data_record_collection.execution_stats.total_execution_time}")
    print(f"Total Execution Cost: {data_record_collection.execution_stats.total_execution_cost}")


================================================
FILE: abacus-research/biodex-max-quality-at-cost.py
================================================
import argparse
import json
import os
import time

import chromadb
import datasets
from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction

# from ragatouille import RAGPretrainedModel
import palimpzest as pz
from palimpzest.constants import Model
from palimpzest.policy import MaxQuality, MaxQualityAtFixedCost

biodex_entry_cols = [
    {"name": "pmid", "type": str, "desc": "The PubMed ID of the medical paper"},
    {"name": "title", "type": str, "desc": "The title of the medical paper"},
    {"name": "abstract", "type": str, "desc": "The abstract of the medical paper"},
    {"name": "fulltext", "type": str, "desc": "The full text of the medical paper, which contains information relevant for creating a drug safety report."},
]

biodex_reactions_cols = [
    {"name": "reactions", "type": list[str], "desc": "The list of all medical conditions experienced by the patient as discussed in the report. Try to provide as many relevant medical conditions as possible."},
]

biodex_reaction_labels_cols = [
    {"name": "reaction_labels", "type": list[str], "desc": "Official terms for medical conditions listed in `reactions`"},
]

biodex_ranked_reactions_labels_cols = [
    {"name": "ranked_reaction_labels", "type": list[str], "desc": "The ranked list of medical conditions experienced by the patient. The most relevant label occurs first in the list. Be sure to rank ALL of the inputs."},
]

class BiodexValidator(pz.Validator):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__()

        # read dataset and prepare entries
        dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="train").to_pandas()
        if shuffle:
            dataset = dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            dataset = dataset.to_dict(orient="records")[:num_samples]

        # compute mapping from pmid --> label (i.e. reactions list)
        self.pmid_to_label = self._compute_pmid_to_label(dataset)

        # store rp_at_k for computing rank-precision at k metric
        self.k = rp_at_k

    def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
        """Compute the label for a BioDEX report given its entry in the dataset."""
        pmid_to_label = {}
        for entry in dataset:
            pmid = str(entry["pmid"])
            reactions_lst = [
                reaction.strip().lower().replace("'", "").replace("^", "")
                for reaction in entry["reactions"].split(",")
            ]
            pmid_to_label[pmid] = reactions_lst

        return pmid_to_label

    def rank_precision_at_k(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # lower-case each list
            preds = [pred.strip().lower().replace("'", "").replace("^", "") for pred in preds]
            targets = set([target.strip().lower().replace("'", "").replace("^", "") for target in targets])

            # compute rank-precision at k
            rn = len(targets)
            denom = min(self.k, rn)
            total = 0.0
            for i in range(self.k):
                total += preds[i] in targets if i < len(preds) else 0.0

            return total / denom

        except Exception:
            os.makedirs("rp@k-errors", exist_ok=True)
            ts = time.time()
            with open(f"rp@k-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def term_recall(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # normalize terms in each list
            pred_terms = set([
                term.strip()
                for pred in preds
                for term in pred.lower().replace("'", "").replace("^", "").split(" ")
            ])
            target_terms = ([
                term.strip()
                for target in targets
                for term in target.lower().replace("'", "").replace("^", "").split(" ")
            ])

            # compute term recall and return
            intersect = pred_terms.intersection(target_terms)
            term_recall = len(intersect) / len(target_terms)

            return term_recall

        except Exception:
            os.makedirs("term-recall-eval-errors", exist_ok=True)
            ts = time.time()
            with open(f"term-recall-eval-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reactions":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.term_recall(preds, targets)
        elif field_name == "ranked_reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.rank_precision_at_k(preds, targets)
        else:
            raise NotImplementedError(f"Validator.map_score_fn not implemented for field {field_name}.")

    def topk_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[input_record["pmid"]]
            return self.term_recall(preds, targets)
        else:
            raise NotImplementedError(f"Validator.topk_score_fn not implemented for field {field_name}.")


class BiodexDataset(pz.IterDataset):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        split: str = "test",
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__(id="biodex", schema=biodex_entry_cols)

        self.dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split=split).to_pandas()
        if shuffle:
            self.dataset = self.dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            self.dataset = self.dataset.to_dict(orient="records")[:num_samples]

        self.rp_at_k = rp_at_k
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed
        self.split = split

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx: int):
        # get entry
        entry = self.dataset[idx]

        # get input fields
        pmid = entry["pmid"]
        title = entry["title"]
        abstract = entry["abstract"]
        fulltext = entry["fulltext"]

        # create item with fields
        item = {"fields": {}, "labels": {}, "score_fn": {}}
        item["fields"]["pmid"] = pmid
        item["fields"]["title"] = title
        item["fields"]["abstract"] = abstract
        item["fields"]["fulltext"] = fulltext

        return item


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description="Run a simple demo")
    parser.add_argument("--verbose", default=False, action="store_true", help="Print verbose output")
    parser.add_argument("--progress", default=False, action="store_true", help="Print progress output")
    parser.add_argument(
        "--execution-strategy",
        default="parallel",
        type=str,
        help="The plan executor to use. One of sequential, pipelined, parallel",
    )
    parser.add_argument(
        "--sentinel-execution-strategy",
        default="mab",
        type=str,
        help="The sentinel execution strategy to use. One of mab or random",
    )
    parser.add_argument(
        "--optimizer-strategy",
        default="pareto",
        type=str,
        help="The optimizer to use. One of pareto or greedy",
    )
    parser.add_argument(
        "--val-examples",
        default=30,
        type=int,
        help="Number of validation examples to sample from",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed used to initialize RNG for MAB sampling algorithm",
    )
    parser.add_argument(
        "--k",
        default=10,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--j",
        default=3,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--sample-budget",
        default=100,
        type=int,
        help="Total sample budget in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--cost",
        default=1.0,
        type=float,
        help="The cost budget for the optimization",
    )
    parser.add_argument(
        "--exp-name",
        default=None,
        type=str,
        help="The experiment name.",
    )
    parser.add_argument(
        "--priors-file",
        default=None,
        type=str,
        help="A file with a dictionary mapping physical operator ids to prior belief on their performance",
    )

    args = parser.parse_args()

    # create directory for profiling data
    os.makedirs("max-quality-at-cost-data", exist_ok=True)

    verbose = args.verbose
    progress = args.progress
    seed = args.seed
    val_examples = args.val_examples
    k = args.k
    j = args.j
    sample_budget = args.sample_budget
    execution_strategy = args.execution_strategy
    sentinel_execution_strategy = args.sentinel_execution_strategy
    optimizer_strategy = args.optimizer_strategy
    cost = args.cost
    exp_name = (
        f"biodex-strategy-{optimizer_strategy}-k{k}-j{j}-budget{sample_budget}-seed{seed}"
        if args.exp_name is None
        else args.exp_name
    )
    priors = None
    if args.priors_file is not None:
        with open(args.priors_file) as f:
            priors = json.load(f)
    print(f"EXPERIMENT NAME: {exp_name}")

    if os.getenv("OPENAI_API_KEY") is None and os.getenv("TOGETHER_API_KEY") is None and os.getenv("ANTHROPIC_API_KEY") is None:
        print("WARNING: OPENAI_API_KEY, TOGETHER_API_KEY, and ANTHROPIC_API_KEY are unset")

    # create validator
    validator = BiodexValidator(
        rp_at_k=5,
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )

    # create validation data source
    train_dataset = BiodexDataset(
        split="train",
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )
    train_dataset = {train_dataset.id: train_dataset}

    # load index [text-embedding-3-small]
    chroma_client = chromadb.PersistentClient(".chroma-biodex")
    openai_ef = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-3-small",
    )
    index = chroma_client.get_collection("biodex-reaction-terms", embedding_function=openai_ef)

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        # execute query with embeddings
        results = index.query(query, n_results=5)

        # get list of result terms with their cosine similarity scores
        final_results = []
        for query_docs, query_distances in zip(results["documents"], results["distances"]):
            for doc, dist in zip(query_docs, query_distances):
                cosine_similarity = 1 - dist
                final_results.append({"content": doc, "similarity": cosine_similarity})

        # sort the results by similarity score
        sorted_results = sorted(final_results, key=lambda result: result["similarity"], reverse=True)

        # remove duplicates
        sorted_results_set = set()
        final_sorted_results = []
        for result in sorted_results:
            if result["content"] not in sorted_results_set:
                sorted_results_set.add(result["content"])
                final_sorted_results.append(result["content"])

        # return the top-k similar results and generation stats
        return {"reaction_labels": final_sorted_results[:k]}

    # construct plan
    plan = BiodexDataset(split="test", num_samples=250, shuffle=True, seed=seed)
    plan = plan.sem_map(biodex_reactions_cols)
    plan = plan.sem_topk(
        index=index,
        search_func=search_func,
        search_attr="reactions",
        output_attrs=biodex_reaction_labels_cols,
    )
    plan = plan.sem_map(biodex_ranked_reactions_labels_cols, depends_on=["title", "abstract", "fulltext", "reaction_labels"])

    # set policy
    policy = MaxQualityAtFixedCost(max_cost=cost) if cost < 999 else MaxQuality()

    # execute pz plan
    config = pz.QueryProcessorConfig(
        policy=policy,
        optimizer_strategy=optimizer_strategy,
        sentinel_execution_strategy=sentinel_execution_strategy,
        execution_strategy=execution_strategy,
        use_final_op_quality=True,
        max_workers=64,
        verbose=verbose,
        available_models=[
            Model.GPT_4o,
            Model.GPT_4o_MINI,
            Model.LLAMA3_1_8B,
            Model.LLAMA3_3_70B,
            # Model.MIXTRAL, # NOTE: only available in tag `abacus-paper-experiments`
            Model.DEEPSEEK_R1_DISTILL_QWEN_1_5B,
        ],
        allow_bonded_query=True,
        allow_critic=True,
        allow_mixtures=True,
        allow_rag_reduction=True,
        progress=progress,
        k=k,
        j=j,
        sample_budget=sample_budget,
        seed=seed,
        exp_name=exp_name,
        priors=priors,
    )

    data_record_collection = plan.optimize_and_run(config=config, train_dataset=train_dataset, validator=validator)

    print(data_record_collection.to_df())
    data_record_collection.to_df().to_csv(f"max-quality-at-cost-data/{exp_name}-output.csv", index=False)

    # create filepaths for records and stats
    records_path = f"max-quality-at-cost-data/{exp_name}-records.json"
    stats_path = f"max-quality-at-cost-data/{exp_name}-profiling.json"

    # save record outputs
    record_jsons = []
    for record in data_record_collection:
        record_dict = record.to_dict()
        record_dict = {
            k: v
            for k, v in record_dict.items()
            if k in ["pmid", "reactions", "reaction_labels", "ranked_reaction_labels"]
        }
        record_jsons.append(record_dict)

    with open(records_path, "w") as f:
        json.dump(record_jsons, f)

    # save statistics
    execution_stats_dict = data_record_collection.execution_stats.to_json()
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)

    # score output
    test_dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="test").to_pandas()
    test_dataset = test_dataset.sample(n=250, random_state=seed).to_dict(orient="records")

    # construct mapping from pmid --> label (field, value) pairs
    def compute_target_record(entry):
        reactions_lst = [
            reaction.strip().lower().replace("'", "").replace("^", "")
            for reaction in entry["reactions"].split(",")
        ]
        label_dict = {"ranked_reaction_labels": reactions_lst}
        return label_dict

    label_fields_to_values = {
        entry["pmid"]: compute_target_record(entry) for entry in test_dataset
    }

    def rank_precision_at_k(preds: list, targets: list, k: int):
        if preds is None:
            return 0.0

        # lower-case each list
        preds = [pred.lower().replace("'", "").replace("^", "") for pred in preds]
        targets = set([target.lower().replace("'", "").replace("^", "") for target in targets])

        # compute rank-precision at k
        rn = len(targets)
        denom = min(k, rn)
        total = 0.0
        for i in range(k):
            total += preds[i] in targets if i < len(preds) else 0.0

        return total / denom

    def compute_avg_rp_at_k(records, k=5):
        total_rp_at_k, bad = 0, 0
        for record in records:
            pmid = record['pmid']
            preds = record['ranked_reaction_labels']
            targets = label_fields_to_values[pmid]['ranked_reaction_labels']
            try:
                total_rp_at_k += rank_precision_at_k(preds, targets, k)
            except Exception:
                print(f"Error computing rank precision at k for record with pmid {pmid}")
                bad += 1

        return total_rp_at_k / len(records), bad

    rp_at_k, failed = compute_avg_rp_at_k(record_jsons, k=5)
    final_plan_id = list(data_record_collection.execution_stats.plan_stats.keys())[0]
    final_plan_str = data_record_collection.execution_stats.plan_strs[final_plan_id]
    stats_dict = {
        "rp@5": rp_at_k,
        "failed": failed,
        "optimization_time": data_record_collection.execution_stats.optimization_time,
        "optimization_cost": data_record_collection.execution_stats.optimization_cost,
        "plan_execution_time": data_record_collection.execution_stats.plan_execution_time,
        "plan_execution_cost": data_record_collection.execution_stats.plan_execution_cost,
        "total_execution_time": data_record_collection.execution_stats.total_execution_time,
        "total_execution_cost": data_record_collection.execution_stats.total_execution_cost,
        "plan_str": final_plan_str,
    }
    with open(f"max-quality-at-cost-data/{exp_name}-metrics.json", "w") as f:
        json.dump(stats_dict, f)

    print(f"rp@k: {rp_at_k:.5f}")
    print(f"failed: {failed}")
    print(f"Optimization time: {data_record_collection.execution_stats.optimization_time}")
    print(f"Optimization cost: {data_record_collection.execution_stats.optimization_cost}")
    print(f"Plan Exec. time: {data_record_collection.execution_stats.plan_execution_time}")
    print(f"Plan Exec. cost: {data_record_collection.execution_stats.plan_execution_cost}")
    print(f"Total Execution time: {data_record_collection.execution_stats.total_execution_time}")
    print(f"Total Execution Cost: {data_record_collection.execution_stats.total_execution_cost}")


================================================
FILE: abacus-research/biodex-min-at-fixed-quality.py
================================================
import argparse
import json
import os
import time

import chromadb
import datasets
from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction

# from ragatouille import RAGPretrainedModel
import palimpzest as pz
from palimpzest.constants import Model

biodex_entry_cols = [
    {"name": "pmid", "type": str, "desc": "The PubMed ID of the medical paper"},
    {"name": "title", "type": str, "desc": "The title of the medical paper"},
    {"name": "abstract", "type": str, "desc": "The abstract of the medical paper"},
    {"name": "fulltext", "type": str, "desc": "The full text of the medical paper, which contains information relevant for creating a drug safety report."},
]

biodex_reactions_cols = [
    {"name": "reactions", "type": list[str], "desc": "The list of all medical conditions experienced by the patient as discussed in the report. Try to provide as many relevant medical conditions as possible."},
]

biodex_reaction_labels_cols = [
    {"name": "reaction_labels", "type": list[str], "desc": "Official terms for medical conditions listed in `reactions`"},
]

biodex_ranked_reactions_labels_cols = [
    {"name": "ranked_reaction_labels", "type": list[str], "desc": "The ranked list of medical conditions experienced by the patient. The most relevant label occurs first in the list. Be sure to rank ALL of the inputs."},
]

class BiodexValidator(pz.Validator):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__()

        # read dataset and prepare entries
        dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="train").to_pandas()
        if shuffle:
            dataset = dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            dataset = dataset.to_dict(orient="records")[:num_samples]

        # compute mapping from pmid --> label (i.e. reactions list)
        self.pmid_to_label = self._compute_pmid_to_label(dataset)

        # store rp_at_k for computing rank-precision at k metric
        self.k = rp_at_k

    def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
        """Compute the label for a BioDEX report given its entry in the dataset."""
        pmid_to_label = {}
        for entry in dataset:
            pmid = str(entry["pmid"])
            reactions_lst = [
                reaction.strip().lower().replace("'", "").replace("^", "")
                for reaction in entry["reactions"].split(",")
            ]
            pmid_to_label[pmid] = reactions_lst

        return pmid_to_label

    def rank_precision_at_k(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # lower-case each list
            preds = [pred.strip().lower().replace("'", "").replace("^", "") for pred in preds]
            targets = set([target.strip().lower().replace("'", "").replace("^", "") for target in targets])

            # compute rank-precision at k
            rn = len(targets)
            denom = min(self.k, rn)
            total = 0.0
            for i in range(self.k):
                total += preds[i] in targets if i < len(preds) else 0.0

            return total / denom

        except Exception:
            os.makedirs("rp@k-errors", exist_ok=True)
            ts = time.time()
            with open(f"rp@k-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def term_recall(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # normalize terms in each list
            pred_terms = set([
                term.strip()
                for pred in preds
                for term in pred.lower().replace("'", "").replace("^", "").split(" ")
            ])
            target_terms = ([
                term.strip()
                for target in targets
                for term in target.lower().replace("'", "").replace("^", "").split(" ")
            ])

            # compute term recall and return
            intersect = pred_terms.intersection(target_terms)
            term_recall = len(intersect) / len(target_terms)

            return term_recall

        except Exception:
            os.makedirs("term-recall-eval-errors", exist_ok=True)
            ts = time.time()
            with open(f"term-recall-eval-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reactions":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.term_recall(preds, targets)
        elif field_name == "ranked_reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.rank_precision_at_k(preds, targets)
        else:
            raise NotImplementedError(f"Validator.map_score_fn not implemented for field {field_name}.")

    def topk_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[input_record["pmid"]]
            return self.term_recall(preds, targets)
        else:
            raise NotImplementedError(f"Validator.topk_score_fn not implemented for field {field_name}.")


class BiodexDataset(pz.IterDataset):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        split: str = "test",
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__(id="biodex", schema=biodex_entry_cols)

        self.dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split=split).to_pandas()
        if shuffle:
            self.dataset = self.dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            self.dataset = self.dataset.to_dict(orient="records")[:num_samples]

        self.rp_at_k = rp_at_k
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed
        self.split = split

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx: int):
        # get entry
        entry = self.dataset[idx]

        # get input fields
        pmid = entry["pmid"]
        title = entry["title"]
        abstract = entry["abstract"]
        fulltext = entry["fulltext"]

        # create item with fields
        item = {"pmid": pmid, "title": title, "abstract": abstract, "fulltext": fulltext}

        return item


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description="Run a simple demo")
    parser.add_argument("--verbose", default=False, action="store_true", help="Print verbose output")
    parser.add_argument("--progress", default=False, action="store_true", help="Print progress output")
    parser.add_argument(
        "--execution-strategy",
        default="parallel",
        type=str,
        help="The plan executor to use. One of sequential, pipelined, parallel",
    )
    parser.add_argument(
        "--sentinel-execution-strategy",
        default="mab",
        type=str,
        help="The sentinel execution strategy to use. One of mab or random",
    )
    parser.add_argument(
        "--optimizer-strategy",
        default="pareto",
        type=str,
        help="The optimizer to use. One of pareto or greedy",
    )
    parser.add_argument(
        "--val-examples",
        default=30,
        type=int,
        help="Number of validation examples to sample from",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed used to initialize RNG for MAB sampling algorithm",
    )
    parser.add_argument(
        "--k",
        default=10,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--j",
        default=3,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--sample-budget",
        default=100,
        type=int,
        help="Total sample budget in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--metric",
        default=None,
        type=str,
        help="whether to minimize latency or cost",
    )
    parser.add_argument(
        "--exp-name",
        default=None,
        type=str,
        help="The experiment name.",
    )
    parser.add_argument(
        "--priors-file",
        default=None,
        type=str,
        help="A file with a dictionary mapping physical operator ids to prior belief on their performance",
    )

    args = parser.parse_args()

    assert args.metric in ["cost", "latency"], "metric must be one of cost or latency"
    metric = args.metric

    # create directory for profiling data
    os.makedirs(f"min-{metric}-at-quality-data", exist_ok=True)

    verbose = args.verbose
    progress = args.progress
    seed = args.seed
    val_examples = args.val_examples
    k = args.k
    j = args.j
    sample_budget = args.sample_budget
    execution_strategy = args.execution_strategy
    sentinel_execution_strategy = args.sentinel_execution_strategy
    optimizer_strategy = args.optimizer_strategy
    exp_name = (
        f"biodex-min-{metric}-strategy-{optimizer_strategy}-k{k}-j{j}-budget{sample_budget}-seed{seed}"
        if args.exp_name is None
        else args.exp_name
    )
    priors = None
    if args.priors_file is not None:
        with open(args.priors_file) as f:
            priors = json.load(f)
    print(f"EXPERIMENT NAME: {exp_name}")

    if os.getenv("OPENAI_API_KEY") is None and os.getenv("TOGETHER_API_KEY") is None and os.getenv("ANTHROPIC_API_KEY") is None:
        print("WARNING: OPENAI_API_KEY, TOGETHER_API_KEY, and ANTHROPIC_API_KEY are unset")

    # create validator
    validator = BiodexValidator(
        rp_at_k=5,
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )

    # create validation data source
    train_dataset = BiodexDataset(
        split="train",
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )
    train_dataset = {train_dataset.id: train_dataset}

    # load index [text-embedding-3-small]
    chroma_client = chromadb.PersistentClient(".chroma-biodex")
    openai_ef = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-3-small",
    )
    index = chroma_client.get_collection("biodex-reaction-terms", embedding_function=openai_ef)

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        # execute query with embeddings
        results = index.query(query, n_results=5)

        # get list of result terms with their cosine similarity scores
        final_results = []
        for query_docs, query_distances in zip(results["documents"], results["distances"]):
            for doc, dist in zip(query_docs, query_distances):
                cosine_similarity = 1 - dist
                final_results.append({"content": doc, "similarity": cosine_similarity})

        # sort the results by similarity score
        sorted_results = sorted(final_results, key=lambda result: result["similarity"], reverse=True)

        # remove duplicates
        sorted_results_set = set()
        final_sorted_results = []
        for result in sorted_results:
            if result["content"] not in sorted_results_set:
                sorted_results_set.add(result["content"])
                final_sorted_results.append(result["content"])

        # return the top-k similar results and generation stats
        return {"reaction_labels": final_sorted_results[:k]}

    # construct plan
    plan = BiodexDataset(split="test", num_samples=250, shuffle=True, seed=seed)
    plan = plan.sem_map(biodex_reactions_cols)
    plan = plan.sem_topk(
        index=index,
        search_func=search_func,
        search_attr="reactions",
        output_attrs=biodex_reaction_labels_cols,
    )
    plan = plan.sem_map(biodex_ranked_reactions_labels_cols, depends_on=["title", "abstract", "fulltext", "reaction_labels"])

    # set policy
    policy = pz.MinCostAtFixedQuality(min_quality=0.216) if metric == "cost" else pz.MinTimeAtFixedQuality(min_quality=0.216)

    # execute pz plan
    config = pz.QueryProcessorConfig(
        policy=policy,
        optimizer_strategy=optimizer_strategy,
        sentinel_execution_strategy=sentinel_execution_strategy,
        execution_strategy=execution_strategy,
        use_final_op_quality=True,
        max_workers=64,
        verbose=verbose,
        available_models=[
            Model.GPT_4o_MINI,
        ],
        allow_bonded_query=True,
        allow_critic=True,
        allow_mixtures=True,
        allow_rag_reduction=True,
        progress=progress,
        k=k,
        j=j,
        sample_budget=sample_budget,
        seed=seed,
        exp_name=exp_name,
        priors=priors,
    )

    data_record_collection = plan.optimize_and_run(config=config, train_dataset=train_dataset, validator=validator)

    print(data_record_collection.to_df())
    data_record_collection.to_df().to_csv(f"min-{metric}-at-quality-data/{exp_name}-output.csv", index=False)

    # create filepaths for records and stats
    records_path = f"min-{metric}-at-quality-data/{exp_name}-records.json"
    stats_path = f"min-{metric}-at-quality-data/{exp_name}-profiling.json"

    # save record outputs
    record_jsons = []
    for record in data_record_collection:
        record_dict = record.to_dict()
        record_dict = {
            k: v
            for k, v in record_dict.items()
            if k in ["pmid", "reactions", "reaction_labels", "ranked_reaction_labels"]
        }
        record_jsons.append(record_dict)

    with open(records_path, "w") as f:
        json.dump(record_jsons, f)

    # save statistics
    execution_stats_dict = data_record_collection.execution_stats.to_json()
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)

    # score output
    test_dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="test").to_pandas()
    test_dataset = test_dataset.sample(n=250, random_state=seed).to_dict(orient="records")

    # construct mapping from pmid --> label (field, value) pairs
    def compute_target_record(entry):
        reactions_lst = [
            reaction.strip().lower().replace("'", "").replace("^", "")
            for reaction in entry["reactions"].split(",")
        ]
        label_dict = {"ranked_reaction_labels": reactions_lst}
        return label_dict

    label_fields_to_values = {
        entry["pmid"]: compute_target_record(entry) for entry in test_dataset
    }

    def rank_precision_at_k(preds: list, targets: list, k: int):
        if preds is None:
            return 0.0

        # lower-case each list
        preds = [pred.lower().replace("'", "").replace("^", "") for pred in preds]
        targets = set([target.lower().replace("'", "").replace("^", "") for target in targets])

        # compute rank-precision at k
        rn = len(targets)
        denom = min(k, rn)
        total = 0.0
        for i in range(k):
            total += preds[i] in targets if i < len(preds) else 0.0

        return total / denom

    def compute_avg_rp_at_k(records, k=5):
        total_rp_at_k, bad = 0, 0
        for record in records:
            pmid = record['pmid']
            preds = record['ranked_reaction_labels']
            targets = label_fields_to_values[pmid]['ranked_reaction_labels']
            try:
                total_rp_at_k += rank_precision_at_k(preds, targets, k)
            except Exception:
                print(f"Error computing rank precision at k for record with pmid {pmid}")
                bad += 1

        return total_rp_at_k / len(records), bad

    rp_at_k, failed = compute_avg_rp_at_k(record_jsons, k=5)
    final_plan_id = list(data_record_collection.execution_stats.plan_stats.keys())[0]
    final_plan_str = data_record_collection.execution_stats.plan_strs[final_plan_id]
    stats_dict = {
        "rp@5": rp_at_k,
        "failed": failed,
        "optimization_time": data_record_collection.execution_stats.optimization_time,
        "optimization_cost": data_record_collection.execution_stats.optimization_cost,
        "plan_execution_time": data_record_collection.execution_stats.plan_execution_time,
        "plan_execution_cost": data_record_collection.execution_stats.plan_execution_cost,
        "total_execution_time": data_record_collection.execution_stats.total_execution_time,
        "total_execution_cost": data_record_collection.execution_stats.total_execution_cost,
        "plan_str": final_plan_str,
    }
    with open(f"min-{metric}-at-quality-data/{exp_name}-metrics.json", "w") as f:
        json.dump(stats_dict, f)

    print(f"rp@k: {rp_at_k:.5f}")
    print(f"failed: {failed}")
    print(f"Optimization time: {data_record_collection.execution_stats.optimization_time}")
    print(f"Optimization cost: {data_record_collection.execution_stats.optimization_cost}")
    print(f"Plan Exec. time: {data_record_collection.execution_stats.plan_execution_time}")
    print(f"Plan Exec. cost: {data_record_collection.execution_stats.plan_execution_cost}")
    print(f"Total Execution time: {data_record_collection.execution_stats.total_execution_time}")
    print(f"Total Execution Cost: {data_record_collection.execution_stats.total_execution_cost}")


================================================
FILE: abacus-research/biodex-pareto-cascades.py
================================================
import argparse
import json
import os
import time

import chromadb
import datasets
from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction

import palimpzest as pz
from palimpzest.constants import Model
from palimpzest.policy import MaxQualityAtFixedCost

biodex_entry_cols = [
    {"name": "pmid", "type": str, "desc": "The PubMed ID of the medical paper"},
    {"name": "title", "type": str, "desc": "The title of the medical paper"},
    {"name": "abstract", "type": str, "desc": "The abstract of the medical paper"},
    {"name": "fulltext", "type": str, "desc": "The full text of the medical paper, which contains information relevant for creating a drug safety report."},
]

biodex_reactions_cols = [
    {"name": "reactions", "type": list[str], "desc": "The list of all medical conditions experienced by the patient as discussed in the report. Try to provide as many relevant medical conditions as possible."},
]

biodex_reaction_labels_cols = [
    {"name": "reaction_labels", "type": list[str], "desc": "Official terms for medical conditions listed in `reactions`"},
]

biodex_ranked_reactions_labels_cols = [
    {"name": "ranked_reaction_labels", "type": list[str], "desc": "The ranked list of medical conditions experienced by the patient. The most relevant label occurs first in the list. Be sure to rank ALL of the inputs."},
]

class BiodexValidator(pz.Validator):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__()

        # read dataset and prepare entries
        dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="train").to_pandas()
        if shuffle:
            dataset = dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            dataset = dataset.to_dict(orient="records")[:num_samples]

        # compute mapping from pmid --> label (i.e. reactions list)
        self.pmid_to_label = self._compute_pmid_to_label(dataset)

        # store rp_at_k for computing rank-precision at k metric
        self.k = rp_at_k

    def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
        """Compute the label for a BioDEX report given its entry in the dataset."""
        pmid_to_label = {}
        for entry in dataset:
            pmid = str(entry["pmid"])
            reactions_lst = [
                reaction.strip().lower().replace("'", "").replace("^", "")
                for reaction in entry["reactions"].split(",")
            ]
            pmid_to_label[pmid] = reactions_lst

        return pmid_to_label

    def rank_precision_at_k(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # lower-case each list
            preds = [pred.strip().lower().replace("'", "").replace("^", "") for pred in preds]
            targets = set([target.strip().lower().replace("'", "").replace("^", "") for target in targets])

            # compute rank-precision at k
            rn = len(targets)
            denom = min(self.k, rn)
            total = 0.0
            for i in range(self.k):
                total += preds[i] in targets if i < len(preds) else 0.0

            return total / denom

        except Exception:
            os.makedirs("rp@k-errors", exist_ok=True)
            ts = time.time()
            with open(f"rp@k-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def term_recall(self, preds: list | None, targets: list):
        if preds is None:
            return 0.0

        try:
            # normalize terms in each list
            pred_terms = set([
                term.strip()
                for pred in preds
                for term in pred.lower().replace("'", "").replace("^", "").split(" ")
            ])
            target_terms = ([
                term.strip()
                for target in targets
                for term in target.lower().replace("'", "").replace("^", "").split(" ")
            ])

            # compute term recall and return
            intersect = pred_terms.intersection(target_terms)
            term_recall = len(intersect) / len(target_terms)

            return term_recall

        except Exception:
            os.makedirs("term-recall-eval-errors", exist_ok=True)
            ts = time.time()
            with open(f"term-recall-eval-errors/error-{ts}.txt", "w") as f:
                f.write(str(preds))
            return 0.0

    def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reactions":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.term_recall(preds, targets)
        elif field_name == "ranked_reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[str(input_record["pmid"])]
            return self.rank_precision_at_k(preds, targets)
        else:
            raise NotImplementedError(f"Validator.map_score_fn not implemented for field {field_name}.")

    def topk_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
        field_name = fields[0]
        if field_name == "reaction_labels":
            preds = output.get(field_name)
            targets = self.pmid_to_label[input_record["pmid"]]
            return self.term_recall(preds, targets)
        else:
            raise NotImplementedError(f"Validator.topk_score_fn not implemented for field {field_name}.")


class BiodexDataset(pz.IterDataset):
    def __init__(
        self,
        rp_at_k: int = 5,
        num_samples: int = 5,
        split: str = "test",
        shuffle: bool = False,
        seed: int = 42,
    ):
        super().__init__(id="biodex", schema=biodex_entry_cols)

        self.dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split=split).to_pandas()
        if shuffle:
            self.dataset = self.dataset.sample(n=num_samples, random_state=seed).to_dict(orient="records")
        else:
            self.dataset = self.dataset.to_dict(orient="records")[:num_samples]

        self.rp_at_k = rp_at_k
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed
        self.split = split

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx: int):
        # get entry
        entry = self.dataset[idx]

        # get input fields
        pmid = entry["pmid"]
        title = entry["title"]
        abstract = entry["abstract"]
        fulltext = entry["fulltext"]

        # create item with fields
        item = {"fields": {}, "labels": {}, "score_fn": {}}
        item["fields"]["pmid"] = pmid
        item["fields"]["title"] = title
        item["fields"]["abstract"] = abstract
        item["fields"]["fulltext"] = fulltext

        return item


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description="Run a simple demo")
    parser.add_argument("--verbose", default=False, action="store_true", help="Print verbose output")
    parser.add_argument("--progress", default=False, action="store_true", help="Print progress output")
    parser.add_argument("--constrained", default=False, action="store_true", help="Use constrained objective")
    parser.add_argument(
        "--execution-strategy",
        default="parallel",
        type=str,
        help="The plan executor to use. One of sequential, pipelined, parallel",
    )
    parser.add_argument(
        "--sentinel-execution-strategy",
        default="mab",
        type=str,
        help="The sentinel execution strategy to use. One of mab or random",
    )
    parser.add_argument(
        "--optimizer-strategy",
        default="pareto",
        type=str,
        help="The optimizer to use. One of pareto or greedy",
    )
    parser.add_argument(
        "--val-examples",
        default=30,
        type=int,
        help="Number of validation examples to sample from",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed used to initialize RNG for MAB sampling algorithm",
    )
    parser.add_argument(
        "--k",
        default=10,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--j",
        default=3,
        type=int,
        help="Number of columns to sample in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--sample-budget",
        default=100,
        type=int,
        help="Total sample budget in Random Sampling or MAB sentinel execution",
    )
    parser.add_argument(
        "--cost",
        default=1.0,
        type=float,
        help="The cost budget for the optimization",
    )
    parser.add_argument(
        "--exp-name",
        default=None,
        type=str,
        help="The experiment name.",
    )
    parser.add_argument(
        "--priors-file",
        default=None,
        type=str,
        help="A file with a dictionary mapping physical operator ids to prior belief on their performance",
    )

    args = parser.parse_args()

    # create directory for profiling data
    os.makedirs("pareto-cascades-data", exist_ok=True)

    verbose = args.verbose
    progress = args.progress
    seed = args.seed
    val_examples = args.val_examples
    k = args.k
    j = args.j
    sample_budget = args.sample_budget
    execution_strategy = args.execution_strategy
    sentinel_execution_strategy = args.sentinel_execution_strategy
    optimizer_strategy = args.optimizer_strategy
    cost = args.cost
    exp_name = (
        f"biodex-strategy-{optimizer_strategy}-k{k}-j{j}-budget{sample_budget}-seed{seed}"
        if args.exp_name is None
        else args.exp_name
    )
    priors = None
    if args.priors_file is not None:
        with open(args.priors_file) as f:
            priors = json.load(f)
    print(f"EXPERIMENT NAME: {exp_name}")

    if os.getenv("OPENAI_API_KEY") is None and os.getenv("TOGETHER_API_KEY") is None and os.getenv("ANTHROPIC_API_KEY") is None:
        print("WARNING: OPENAI_API_KEY, TOGETHER_API_KEY, and ANTHROPIC_API_KEY are unset")

    # create validator
    validator = BiodexValidator(
        rp_at_k=5,
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )

    # create validation data source
    train_dataset = BiodexDataset(
        split="train",
        num_samples=val_examples,
        shuffle=True,
        seed=seed,
    )
    train_dataset = {train_dataset.id: train_dataset}

    # load index [text-embedding-3-small]
    chroma_client = chromadb.PersistentClient(".chroma-biodex")
    openai_ef = OpenAIEmbeddingFunction(
        api_key=os.environ["OPENAI_API_KEY"],
        model_name="text-embedding-3-small",
    )
    index = chroma_client.get_collection("biodex-reaction-terms", embedding_function=openai_ef)

    def search_func(index: chromadb.Collection, query: list[list[float]], k: int) -> list[str]:
        # execute query with embeddings
        results = index.query(query, n_results=5)

        # get list of result terms with their cosine similarity scores
        final_results = []
        for query_docs, query_distances in zip(results["documents"], results["distances"]):
            for doc, dist in zip(query_docs, query_distances):
                cosine_similarity = 1 - dist
                final_results.append({"content": doc, "similarity": cosine_similarity})

        # sort the results by similarity score
        sorted_results = sorted(final_results, key=lambda result: result["similarity"], reverse=True)

        # remove duplicates
        sorted_results_set = set()
        final_sorted_results = []
        for result in sorted_results:
            if result["content"] not in sorted_results_set:
                sorted_results_set.add(result["content"])
                final_sorted_results.append(result["content"])

        # return the top-k similar results and generation stats
        return {"reaction_labels": final_sorted_results[:k]}

    # construct plan
    plan = BiodexDataset(split="test", num_samples=250, shuffle=True, seed=seed)
    plan = plan.sem_map(biodex_reactions_cols)
    plan = plan.sem_topk(
        index=index,
        search_func=search_func,
        search_attr="reactions",
        output_attrs=biodex_reaction_labels_cols,
    )
    plan = plan.sem_map(biodex_ranked_reactions_labels_cols, depends_on=["title", "abstract", "fulltext", "reaction_labels"])

    # execute pz plan
    config = pz.QueryProcessorConfig(
        policy=MaxQualityAtFixedCost(max_cost=cost),
        optimizer_strategy=optimizer_strategy,
        sentinel_execution_strategy=sentinel_execution_strategy,
        execution_strategy=execution_strategy,
        use_final_op_quality=True,
        max_workers=64,
        verbose=verbose,
        available_models=[
            Model.GPT_4o_MINI,
            Model.LLAMA3_2_3B,
            Model.LLAMA3_1_8B,
            Model.LLAMA3_3_70B,
            # Model.MIXTRAL, # NOTE: only available in tag `abacus-paper-experiments`
            Model.DEEPSEEK_R1_DISTILL_QWEN_1_5B,
        ],
        allow_bonded_query=True,
        allow_critic=True,
        allow_mixtures=True,
        allow_rag_reduction=True,
        progress=progress,
        k=k,
        j=j,
        sample_budget=sample_budget,
        seed=seed,
        exp_name=exp_name,
        priors=priors,
    )

    data_record_collection = plan.optimize_and_run(config=config, train_dataset=train_dataset, validator=validator)

    print(data_record_collection.to_df())
    data_record_collection.to_df().to_csv(f"pareto-cascades-data/{exp_name}-output.csv", index=False)

    # create filepaths for records and stats
    records_path = f"pareto-cascades-data/{exp_name}-records.json"
    stats_path = f"pareto-cascades-data/{exp_name}-profiling.json"

    # save record outputs
    record_jsons = []
    for record in data_record_collection:
        record_dict = record.to_dict()
        record_dict = {
            k: v
            for k, v in record_dict.items()
            if k in ["pmid", "reactions", "reaction_labels", "ranked_reaction_labels"]
        }
        record_jsons.append(record_dict)

    with open(records_path, "w") as f:
        json.dump(record_jsons, f)

    # save statistics
    execution_stats_dict = data_record_collection.execution_stats.to_json()
    with open(stats_path, "w") as f:
        json.dump(execution_stats_dict, f)

    # score output
    test_dataset = datasets.load_dataset("BioDEX/BioDEX-Reactions", split="test").to_pandas()
    test_dataset = test_dataset.sample(n=250, random_state=seed).to_dict(orient="records")

    # construct mapping from pmid --> label (field, value) pairs
    def compute_target_record(entry):
        reactions_lst = [
            reaction.strip().lower().replace("'", "").replace("^", "")
            for reaction in entry["reactions"].split(",")
        ]
        label_dict = {"ranked_reaction_labels": reactions_lst}
        return label_dict

    label_fields_to_values = {
        entry["pmid"]: compute_target_record(entry) for entry in test_dataset
    }

    def rank_precision_at_k(preds: list, targets: list, k: int):
        if preds is None:
            return 0.0

        # lower-case each list
        preds = [pred.lower().replace("'", "").replace("^", "") for pred in preds]
        targets = set([target.lower().replace("'", "").replace("^", "") for target in targets])

        # compute rank-precision at k
        rn = len(targets)
        denom = min(k, rn)
        total = 0.0
        for i in range(k):
            total += preds[i] in targets if i < len(preds) else 0.0

        return total / denom

    def compute_avg_rp_at_k(records, k=5):
        total_rp_at_k, bad = 0, 0
        for record in records:
            pmid = record['pmid']
            preds = record['ranked_reaction_labels']
            targets = label_fields_to_values[pmid]['ranked_reaction_labels']
            try:
                total_rp_at_k += rank_precision_at_k(preds, targets, k)
            except Exception:
                print(f"Error computing rank precision at k for record with pmid {pmid}")
                bad += 1

        return total_rp_at_k / len(records), bad

    rp_at_k, failed = compute_avg_rp_at_k(record_jsons, k=5)
    final_plan_id = list(data_record_collection.execution_stats.plan_stats.keys())[0]
    final_plan_str = data_record_collection.execution_stats.plan_strs[final_plan_id]
    stats_dict = {
        "rp@5": rp_at_k,
        "failed": failed,
        "optimization_time": data_record_collection.execution_stats.optimization_time,
        "optimization_cost": data_record_collection.execution_stats.optimization_cost,
        "plan_execution_time": data_record_collection.execution_stats.plan_execution_time,
        "plan_execution_cost": data_record_collection.execution_stats.plan_execution_cost,
        "total_execution_time": data_record_collection.execution_stats.total_execution_time,
        "total_execution_cost": data_record_collection.execution_stats.total_execution_cost,
        "plan_str": final_plan_str,
    }
    with open(f"pareto-cascades-data/{exp_name}-metrics.json", "w") as f:
        json.dump(stats_dict, f)

    print(f"rp@k: {rp_at_k:.5f}")
    print(f"failed: {failed}")
    print(f"Optimization time: {data_record_collection.execution_stats.optimization_time}")
    print(f"Optimization cost: {data_record_collection.execution_stats.optimization_cost}")
    print(f"Plan Exec. time: {data_record_collection.execution_stats.plan_execution_time}")
    print(f"Plan Exec. cost: {data_record_collection.execution_stats.plan_execution_cost}")
    print(f"Total Execution time: {data_record_collection.execution_stats.total_execution_time}")
    print(f"Total Execution Cost: {data_record_collection.execution_stats.total_execution_cost}")


================================================
FILE: abacus-research/biodex-priors-cascades.json
================================================
{"0005c18b69": {"quality": 0.19444444444444442, "cost": 0.0038703809999999996, "time": 61.16110005378724}, "009df798a3": {"quality": 0.09252136752136753, "cost": 0.003526569, "time": 74.32940173149109}, "00c93aec22": {"quality": 0.10641025641025642, "cost": 0.010484405999999998, "time": 72.25670802593231}, "00e1fecc4c": {"quality": 0.21752136752136753, "cost": 0.006413474000000001, "time": 63.18833725452423}, "00f4acd0d3": {"quality": 0.0, "cost": 0.005918445, "time": 48.42981550693512}, "01413aa72d": {"quality": 0.16538461538461538, "cost": 0.008015024, "time": 69.29275906085968}, "01c2f973ad": {"quality": 0.1626068376068376, "cost": 0.002413815, "time": 42.94722969532013}, "02078988c1": {"quality": 0.0, "cost": 0.006904731999999999, "time": 27.019422554969786}, "021604dec1": {"quality": 0.3175213675213675, "cost": 0.008399656999999998, "time": 67.33289885520935}, "02410c662e": {"quality": 0.21752136752136753, "cost": 0.006360101999999999, "time": 77.91296794414521}, "0262668df7": {"quality": 0.21752136752136753, "cost": 0.0075904909999999996, "time": 59.82583842277527}, "0267c97b70": {"quality": 0.0, "cost": 0.003249014999999999, "time": 50.19215798377991}, "02ae38e4aa": {"quality": 0.1876068376068376, "cost": 0.004417125, "time": 69.9335319519043}, "02f49fe0fd": {"quality": 0.14807692307692308, "cost": 0.00044366999999999996, "time": 35.21025230884552}, "030756558c": {"quality": 0.04444444444444444, "cost": 0.002745915, "time": 53.435633063316345}, "033ca325e6": {"quality": 0.20277777777777778, "cost": 0.002408562, "time": 27.521768379211426}, "038a5f0a62": {"quality": 0.1876068376068376, "cost": 0.006526305, "time": 52.622441697120664}, "041b5af43d": {"quality": 0.21752136752136753, "cost": 0.010993874, "time": 64.76753988265992}, "042d933706": {"quality": 0.19444444444444442, "cost": 0.008023619, "time": 57.533840489387515}, "04397effa0": {"quality": 0.2064102564102564, "cost": 0.007009628999999999, "time": 76.10882368087769}, "0539e0b42d": {"quality": 0.25085470085470085, "cost": 0.007662647000000001, "time": 80.42491667270662}, "0554568b86": {"quality": 0.14038461538461539, "cost": 0.008002722, "time": 58.663866949081424}, "06493715cc": {"quality": 0.02222222222222222, "cost": 0.005358681, "time": 78.51178731918336}, "067ee6e91b": {"quality": 0.2098290598290598, "cost": 0.0072115949999999995, "time": 56.89639482498169}, "068b66f00d": {"quality": 0.1, "cost": 0.006528483999999999, "time": 68.53199887275696}, "0695f9b5fc": {"quality": 0.06944444444444445, "cost": 0.007583937, "time": 73.26675381660462}, "073ed5b301": {"quality": 0.07371794871794872, "cost": 0.005419403999999999, "time": 60.23991825580597}, "079feb14a8": {"quality": 0.2098290598290598, "cost": 0.008877133, "time": 62.487732887268066}, "07a3a7daf7": {"quality": 0.0, "cost": 0.006907794, "time": 43.434891891479495}, "08127cd6dd": {"quality": 0.1987179487179487, "cost": 0.012973673000000002, "time": 55.40713529586792}, "0833133620": {"quality": 0.1987179487179487, "cost": 0.005889005999999999, "time": 65.5436268568039}, "089565077c": {"quality": 0.025, "cost": 0.000339832, "time": 65.31151757240295}, "08bf8cc191": {"quality": 0.10555555555555557, "cost": 0.011634817000000002, "time": 95.24387204647064}, "08e1802287": {"quality": 0.23974358974358972, "cost": 0.0033798820000000003, "time": 25.597942876815797}, "090cd3ef31": {"quality": 0.23974358974358972, "cost": 0.0031741180000000005, "time": 25.463910150527955}, "0944a921e8": {"quality": 0.0702991452991453, "cost": 0.00792617, "time": 85.74197387695312}, "0947216ece": {"quality": 0.1987179487179487, "cost": 0.015770336, "time": 82.8155886888504}, "096d51f670": {"quality": 0.18482905982905984, "cost": 0.009268239, "time": 104.61172659397124}, "09791c731b": {"quality": 0.0438034188034188, "cost": 0.006413886, "time": 85.14133274555206}, "0990c0d4f8": {"quality": 0.2098290598290598, "cost": 0.007531591, "time": 77.474085521698}, "0a128688c1": {"quality": 0.18333333333333332, "cost": 0.014743780000000001, "time": 100.52884330749512}, "0a4c1bbb4a": {"quality": 0.16538461538461538, "cost": 0.005475987, "time": 77.0306186914444}, "0ac969dde3": {"quality": 0.19444444444444442, "cost": 0.009684573, "time": 102.05468263626099}, "0af1efab0e": {"quality": 0.0, "cost": 0.0035705999999999993, "time": 76.96206550598144}, "0b1ed7ff58": {"quality": 0.19252136752136753, "cost": 0.008049169, "time": 92.94120838642121}, "0b3dc2e896": {"quality": 0.0, "cost": 0.00892118, "time": 96.49321339130401}, "0b43e94f3f": {"quality": 0.1987179487179487, "cost": 0.001608864, "time": 40.973159313201904}, "0b4ab72197": {"quality": 0.061111111111111116, "cost": 0.010094982, "time": 72.49727232456206}, "0be862a0dc": {"quality": 0.04038461538461539, "cost": 0.0062549730000000005, "time": 56.527322101593015}, "0bf3129ae8": {"quality": 0.16538461538461538, "cost": 0.0028206660000000003, "time": 54.240378093719485}, "0c020b86a3": {"quality": 0.18482905982905984, "cost": 0.008657178000000001, "time": 49.74211900234222}, "0c6c7fe96a": {"quality": 0.12863247863247865, "cost": 0.012195626999999999, "time": 80.83456852436066}, "0c81c8996a": {"quality": 0.023076923076923078, "cost": 0.006098082000000001, "time": 71.85198886394501}, "0cdc5954dd": {"quality": 0.11666666666666667, "cost": 0.008310576, "time": 66.0079866170883}, "0d25188bf7": {"quality": 0.04807692307692308, "cost": 0.008176553999999999, "time": 84.21552875041962}, "0d9d767ae5": {"quality": 0.04444444444444444, "cost": 0.012340264, "time": 71.0254203081131}, "0e36342fe7": {"quality": 0.09252136752136753, "cost": 0.00467736, "time": 38.08519749641418}, "0e7e862290": {"quality": 0.0, "cost": 0.003672576, "time": 74.36907055377961}, "0e91cd07f9": {"quality": 0.18333333333333332, "cost": 0.006350159999999999, "time": 76.38928816318511}, "0ec672e7c8": {"quality": 0.2098290598290598, "cost": 0.013312296999999999, "time": 90.61400089263915}, "0ed243f788": {"quality": 0.23205128205128206, "cost": 0.008812347, "time": 80.54674921035766}, "0eeb372802": {"quality": 0.23974358974358972, "cost": 0.013314724, "time": 94.53299105167389}, "0effe9b1dc": {"quality": 0.15918803418803418, "cost": 0.003532296, "time": 72.70150463581085}, "0f7faf684d": {"quality": 0.10982905982905983, "cost": 0.011855149000000002, "time": 89.06640274524689}, "0fcec544e3": {"quality": 0.25982905982905985, "cost": 0.004687262999999999, "time": 83.44144034385681}, "0ff126ebf8": {"quality": 0.21752136752136753, "cost": 0.011823445, "time": 64.27427606582641}, "112d9a3421": {"quality": 0.2730769230769231, "cost": 0.010725950000000001, "time": 87.46276273727418}, "114a097c53": {"quality": 0.10982905982905983, "cost": 0.00542718, "time": 47.13481698036194}, "116334cd72": {"quality": 0.3175213675213675, "cost": 0.012438881999999998, "time": 93.07386040687561}, "1175ee37e6": {"quality": 0.19444444444444442, "cost": 0.001459101, "time": 30.085742378234862}, "11a66478dc": {"quality": 0.125, "cost": 0.00746376, "time": 63.90720744132996}, "11bc996d48": {"quality": 0.2098290598290598, "cost": 0.0065042699999999995, "time": 65.52873225212097}, "11debf9fc0": {"quality": 0.18482905982905984, "cost": 0.006505245, "time": 89.37637939453126}, "123fb650fb": {"quality": 0.21752136752136753, "cost": 0.008327412, "time": 88.56314113140107}, "1274c21076": {"quality": 0.125, "cost": 0.011184342, "time": 102.6374900341034}, "127af50739": {"quality": 0.16538461538461538, "cost": 0.008449134, "time": 104.71919329166413}, "12addbf5e2": {"quality": 0.04444444444444444, "cost": 0.003276834, "time": 88.02170798778533}, "133ee5023f": {"quality": 0.1987179487179487, "cost": 0.005588716, "time": 53.45017175674438}, "1368e1c78e": {"quality": 0.16752136752136754, "cost": 0.0034759379999999996, "time": 84.43210818767548}, "13a009fe0c": {"quality": 0.21752136752136753, "cost": 0.012945674, "time": 98.5892866373062}, "13da306f84": {"quality": 0.10363247863247865, "cost": 0.0087151, "time": 77.5059399843216}, "13f2b9c25b": {"quality": 0.25982905982905985, "cost": 0.003494103, "time": 98.82596600055695}, "13f75f9bd0": {"quality": 0.2064102564102564, "cost": 0.007151064, "time": 70.01372668743133}, "1404e0aa35": {"quality": 0.2098290598290598, "cost": 0.007724231, "time": 94.99440484046936}, "140ededb41": {"quality": 0.2098290598290598, "cost": 0.001514445, "time": 19.843628692626954}, "142f3a7c70": {"quality": 0.0, "cost": 0.004404129, "time": 77.21951529979705}, "1468dddecc": {"quality": 0.16944444444444445, "cost": 0.002386455, "time": 48.906515979766844}, "14d19a01e2": {"quality": 0.05641025641025641, "cost": 0.0063798959999999995, "time": 72.75729236602783}, "1624bb5302": {"quality": 0.18482905982905984, "cost": 0.001919664, "time": 66.44049925804138}, "1636e0833b": {"quality": 0.025, "cost": 0.002752764, "time": 74.27101521492004}, "1658296f3a": {"quality": 0.21752136752136753, "cost": 0.005375874, "time": 64.17133255004883}, "16f351273f": {"quality": 0.17307692307692307, "cost": 0.01028627, "time": 67.0687807559967}, "171e6ae293": {"quality": 0.21752136752136753, "cost": 0.007912404000000001, "time": 62.13353226184845}, "176da24f53": {"quality": 0.015384615384615385, "cost": 0.0038477320000000004, "time": 42.74407241344451}, "179379555f": {"quality": 0.1987179487179487, "cost": 0.006129615, "time": 88.43889818191528}, "17c928174f": {"quality": 0.1987179487179487, "cost": 0.0028421130000000003, "time": 75.59559867382049}, "181c91d1be": {"quality": 0.21752136752136753, "cost": 0.012095439999999999, "time": 67.77491972446441}, "18368684cd": {"quality": 0.05641025641025641, "cost": 0.0059048880000000005, "time": 90.52595224380494}, "183743e76e": {"quality": 0.0, "cost": 0.005899368, "time": 74.72685542106629}, "18f55750b0": {"quality": 0.0, "cost": 0.008405136, "time": 124.15524208545685}, "19563b057d": {"quality": 0.1987179487179487, "cost": 0.011058744, "time": 94.49535017013551}, "1957127275": {"quality": 0.29252136752136754, "cost": 0.008193474000000001, "time": 92.91692547798156}, "197bb53f10": {"quality": 0.2098290598290598, "cost": 0.006091916999999999, "time": 79.83643505573272}, "199fd1fbf2": {"quality": 0.058333333333333334, "cost": 0.00121965, "time": 48.699701046943666}, "19b40e0271": {"quality": 0.3175213675213675, "cost": 0.008674995000000001, "time": 116.94037146568297}, "19e3db7fe7": {"quality": 0.025, "cost": 0.01204651, "time": 117.68800120353698}, "1ad856985f": {"quality": 0.19444444444444442, "cost": 0.008808637000000001, "time": 115.93177556991577}, "1adec2dca2": {"quality": 0.0, "cost": 0.013760218000000001, "time": 125.3757052898407}, "1b04a2b184": {"quality": 0.09252136752136753, "cost": 0.013170014, "time": 111.55746636390685}, "1b28439bd7": {"quality": 0.21752136752136753, "cost": 0.010848586, "time": 84.68317291736602}, "1beb2fac62": {"quality": 0.3175213675213675, "cost": 0.013538090000000001, "time": 73.24731080532074}, "1c347e4d91": {"quality": 0.11752136752136753, "cost": 0.003958569, "time": 81.87744581699371}, "1c3882926e": {"quality": 0.06538461538461539, "cost": 0.006650534999999999, "time": 102.02758514881134}, "1cc6d9efb6": {"quality": 0.21752136752136753, "cost": 0.011642267999999999, "time": 73.37450892925263}, "1ce3d77039": {"quality": 0.1987179487179487, "cost": 0.000591721, "time": 33.82067358493805}, "1ce99cf2c8": {"quality": 0.19252136752136753, "cost": 0.007676729, "time": 109.94239032268524}, "1d26090364": {"quality": 0.08141025641025641, "cost": 0.004802969999999999, "time": 37.3655684709549}, "1d87f97e62": {"quality": 0.25085470085470085, "cost": 0.0074821539999999995, "time": 69.90373904705048}, "1da2369719": {"quality": 0.1737179487179487, "cost": 0.003804339, "time": 75.42430078983307}, "1e18e60895": {"quality": 0.1952991452991453, "cost": 0.000637105, "time": 36.58538358211517}, "1e1bf7e88b": {"quality": 0.2064102564102564, "cost": 0.012300497, "time": 91.10763094425201}, "1e8b3521f8": {"quality": 0.23205128205128206, "cost": 0.003055887, "time": 85.00319654941559}, "1f5e8c9e9a": {"quality": 0.06538461538461539, "cost": 0.01254384, "time": 110.50039112567902}, "2018bef45f": {"quality": 0.13974358974358975, "cost": 0.0073448359999999996, "time": 90.09606876373292}, "2066966577": {"quality": 0.19444444444444442, "cost": 0.011873376, "time": 119.55739791393279}, "2075ff1d04": {"quality": 0.08333333333333334, "cost": 0.0009685659999999999, "time": 70.29870116710663}, "2080b60a57": {"quality": 0.04444444444444444, "cost": 0.005072787, "time": 116.32027242183685}, "208a98f514": {"quality": 0.21752136752136753, "cost": 0.009998995, "time": 118.32991974353791}, "20e10af7d4": {"quality": 0.2098290598290598, "cost": 0.005652002999999999, "time": 116.42957108020784}, "20e2c0b057": {"quality": 0.10982905982905983, "cost": 0.007542789, "time": 113.53230805397033}, "211b89b4cd": {"quality": 0.1952991452991453, "cost": 0.005615415, "time": 63.006171917915346}, "2153174e2d": {"quality": 0.11474358974358972, "cost": 0.007281448, "time": 118.94957902431489}, "21b2b8ebd1": {"quality": 0.21752136752136753, "cost": 0.007063268000000001, "time": 76.91416993141175}, "21b2df8512": {"quality": 0.19444444444444442, "cost": 0.00326791, "time": 69.47645666599274}, "21bed16a7d": {"quality": 0.07307692307692308, "cost": 0.0016491099999999999, "time": 30.10742793083191}, "227246dff8": {"quality": 0.1876068376068376, "cost": 0.0006464099999999999, "time": 42.72886557579041}, "227c30d349": {"quality": 0.1, "cost": 0.005918874, "time": 103.58872454166413}, "228687831a": {"quality": 0.04722222222222222, "cost": 0.005076029999999999, "time": 67.8051172733307}, "23075b2a6e": {"quality": 0.05, "cost": 0.0024716549999999997, "time": 97.33722229003905}, "23566f15ab": {"quality": 0.1876068376068376, "cost": 0.00044987499999999997, "time": 40.82834756374359}, "2370cebb10": {"quality": 0.21752136752136753, "cost": 0.004052682, "time": 106.21984114646912}, "2386b03c4c": {"quality": 0.1987179487179487, "cost": 0.006683786999999999, "time": 110.57667918205262}, "24957f3a43": {"quality": 0.1987179487179487, "cost": 0.004216121999999999, "time": 70.75154151916504}, "24c122de4e": {"quality": 0.061111111111111116, "cost": 0.0059287260000000005, "time": 78.56412732601166}, "24f76747b9": {"quality": 0.14871794871794872, "cost": 0.003334974, "time": 96.20667114257813}, "2609bfd616": {"quality": 0.08482905982905983, "cost": 0.009335312, "time": 101.31035015583038}, "260ab3e966": {"quality": 0.18482905982905984, "cost": 0.006512652000000001, "time": 107.31265118122101}, "2728c8eb6a": {"quality": 0.05918803418803419, "cost": 0.009020646, "time": 82.33452692031861}, "27971eaaf5": {"quality": 0.1876068376068376, "cost": 0.005893562, "time": 85.50352709293365}, "27ba0964b2": {"quality": 0.18205128205128207, "cost": 0.0012502439999999997, "time": 50.85352149009705}, "27daa50458": {"quality": 0.21752136752136753, "cost": 0.005911454, "time": 49.84150557518005}, "28369b2421": {"quality": 0.3175213675213675, "cost": 0.008581950000000001, "time": 82.63782026767731}, "28421e6d62": {"quality": 0.05, "cost": 0.0071309500000000005, "time": 77.62054443359375}, "2848c42f91": {"quality": 0.0, "cost": 0.004609089, "time": 92.82525777816772}, "28a638bb6e": {"quality": 0.04444444444444444, "cost": 0.0037765230000000004, "time": 103.16399810314178}, "290947fe5a": {"quality": 0.08482905982905983, "cost": 0.005273313, "time": 115.58293502330781}, "2936c3e43e": {"quality": 0.13205128205128205, "cost": 0.011067623, "time": 92.91213045120239}, "293ec5edca": {"quality": 0.2987179487179487, "cost": 0.006866393, "time": 77.98297226428986}, "294e541235": {"quality": 0.0, "cost": 0.0024907139999999998, "time": 55.42697856426239}, "295ed5e759": {"quality": 0.023076923076923078, "cost": 0.007770515999999999, "time": 73.042240858078}, "2960431101": {"quality": 0.21752136752136753, "cost": 0.007103678, "time": 72.23796293735504}, "29892d8468": {"quality": 0.0, "cost": 0.011991246, "time": 90.30782704353332}, "299a0aeb65": {"quality": 0.21752136752136753, "cost": 0.008716849999999998, "time": 87.09911065101625}, "29bf3c0a3b": {"quality": 0.0952991452991453, "cost": 0.008317653000000001, "time": 95.62355568408967}, "29c8c693e2": {"quality": 0.19252136752136753, "cost": 0.009049301000000001, "time": 97.66356644630432}, "2a7d15f4a7": {"quality": 0.1987179487179487, "cost": 0.0007805049999999999, "time": 26.81472017765045}, "2ae24e0124": {"quality": 0.2098290598290598, "cost": 0.006997367999999999, "time": 100.80345997810363}, "2b5679d248": {"quality": 0.23482905982905983, "cost": 0.012146391000000001, "time": 73.05133624076844}, "2b5ab72a55": {"quality": 0.1, "cost": 0.00048774399999999997, "time": 87.95751609802247}, "2b82a67eb1": {"quality": 0.2098290598290598, "cost": 0.002022681, "time": 73.1611754655838}, "2bcbffdf85": {"quality": 0.1, "cost": 0.003530234999999999, "time": 89.18944058418273}, "2bd39ee744": {"quality": 0.2098290598290598, "cost": 0.014582621000000002, "time": 59.03191387653351}, "2bf38d797f": {"quality": 0.2064102564102564, "cost": 0.0075413270000000004, "time": 54.856262516975406}, "2c1640adf7": {"quality": 0.18482905982905984, "cost": 0.0019162139999999999, "time": 77.58103239536285}, "2c5cf9eb26": {"quality": 0.125, "cost": 0.006593481, "time": 71.76690304279327}, "2c87313a93": {"quality": 0.11752136752136753, "cost": 0.012109324000000001, "time": 136.4559998512268}, "2c9a9f94c4": {"quality": 0.06538461538461539, "cost": 0.011352702, "time": 108.1334749698639}, "2d3bbc2d23": {"quality": 0.1987179487179487, "cost": 0.006806840999999999, "time": 94.72236230373383}, "2d7f1dbd4b": {"quality": 0.1987179487179487, "cost": 0.003597009, "time": 112.8651005268097}, "2de113167b": {"quality": 0.11752136752136753, "cost": 0.011756008, "time": 101.33509397506714}, "2de3eb2c19": {"quality": 0.23974358974358972, "cost": 0.006698560000000001, "time": 58.83604803085328}, "2e02b71061": {"quality": 0.09444444444444444, "cost": 0.002093574, "time": 88.67284088134765}, "2e30394ac6": {"quality": 0.07307692307692308, "cost": 0.007492572, "time": 90.685981965065}, "2e5d071f21": {"quality": 0.12863247863247865, "cost": 0.0072971500000000005, "time": 123.94845788478851}, "2e9c5cc9bf": {"quality": 0.05, "cost": 0.005878296000000001, "time": 49.976054430007935}, "2ec4bec1a3": {"quality": 0.21474358974358973, "cost": 0.010779144000000001, "time": 124.55905365943909}, "2f1573da80": {"quality": 0.1987179487179487, "cost": 0.008802546, "time": 103.0501886844635}, "2fc0cb3592": {"quality": 0.06538461538461539, "cost": 0.00690309, "time": 73.22720482349396}, "2fd9cd426a": {"quality": 0.21752136752136753, "cost": 0.01168472, "time": 97.46322917938232}, "3019af79b3": {"quality": 0.1, "cost": 0.001074796, "time": 51.414027357101446}, "302c1d97fc": {"quality": 0.1814102564102564, "cost": 0.01244514, "time": 70.44251236915588}, "303b467574": {"quality": 0.21752136752136753, "cost": 0.007695415999999999, "time": 104.79880075454712}, "3058b1f1f8": {"quality": 0.12863247863247865, "cost": 0.006238776, "time": 104.19706840515136}, "30ae4cbe91": {"quality": 0.19252136752136753, "cost": 0.000758614, "time": 64.8543310880661}, "30e3ff1d17": {"quality": 0.1841880341880342, "cost": 0.007638456, "time": 97.64294464588164}, "30f20c8fe6": {"quality": 0.1876068376068376, "cost": 0.006185881000000001, "time": 95.83135304450988}, "316759d191": {"quality": 0.06538461538461539, "cost": 0.01277288, "time": 106.51430850028991}, "3177802176": {"quality": 0.05, "cost": 0.000812176, "time": 68.97149279117585}, "3184f977a8": {"quality": 0.06944444444444445, "cost": 0.003731166, "time": 116.78110229969025}, "3194e440cf": {"quality": 0.1, "cost": 0.0021085889999999997, "time": 68.94484202861786}, "3197ad4faf": {"quality": 0.125, "cost": 0.0029357279999999994, "time": 104.7419487476349}, "31a423a3bf": {"quality": 0.0, "cost": 0.002416362, "time": 71.78247156143189}, "321e17afbd": {"quality": 0.1952991452991453, "cost": 0.009030642000000002, "time": 102.16506278514862}, "32b101d807": {"quality": 0.21752136752136753, "cost": 0.013004699000000002, "time": 107.63277230262756}, "332a350ea2": {"quality": 0.10363247863247865, "cost": 0.005886114, "time": 79.25630362033844}, "33459cd29c": {"quality": 0.02222222222222222, "cost": 0.009792765999999998, "time": 109.5698808670044}, "33a187e74f": {"quality": 0.1987179487179487, "cost": 0.0057371969999999994, "time": 108.5205159664154}, "34026bb5cc": {"quality": 0.2098290598290598, "cost": 0.003850629, "time": 79.99503300189971}, "3511b5e1d0": {"quality": 0.0, "cost": 0.007917534, "time": 106.0169960975647}, "3513e54767": {"quality": 0.12307692307692308, "cost": 0.00652464, "time": 56.468523144721985}, "353f0cb1ac": {"quality": 0.2064102564102564, "cost": 0.0027354140000000002, "time": 50.53772373199463}, "3550bf88cb": {"quality": 0.22863247863247865, "cost": 0.007747509, "time": 104.70380001068116}, "35610fb420": {"quality": 0.2098290598290598, "cost": 0.004438119, "time": 84.96952936649322}, "357267e14b": {"quality": 0.17222222222222222, "cost": 0.00107605, "time": 39.50561866760254}, "36011c7606": {"quality": 0.1987179487179487, "cost": 0.008097354, "time": 84.37511310577392}, "362d480d6d": {"quality": 0.22863247863247865, "cost": 0.010916724, "time": 110.61522128582001}, "363209b6e7": {"quality": 0.32863247863247863, "cost": 0.006868834000000001, "time": 83.50905873775483}, "3637084f91": {"quality": 0.3175213675213675, "cost": 0.007119692, "time": 82.10715711116791}, "36b17c40f3": {"quality": 0.03418803418803419, "cost": 0.004674881999999999, "time": 53.087517738342285}, "36c66671ee": {"quality": 0.12307692307692308, "cost": 0.006580928999999999, "time": 86.52281455993652}, "372e8b5f4f": {"quality": 0.058333333333333334, "cost": 0.001610232, "time": 70.54334411621093}, "375ed248fe": {"quality": 0.06752136752136753, "cost": 0.008873826000000001, "time": 86.2198546409607}, "377b8b0bcc": {"quality": 0.025, "cost": 0.0049981109999999995, "time": 106.38127601146698}, "37bd28f2c9": {"quality": 0.21752136752136753, "cost": 0.007961847, "time": 103.88883001804352}, "38075bb01f": {"quality": 0.21666666666666667, "cost": 0.003875973, "time": 75.78762099742889}, "38567d6a43": {"quality": 0.0, "cost": 0.004616319, "time": 89.08885374069214}, "389a99ab21": {"quality": 0.07371794871794872, "cost": 0.007963053, "time": 102.66287496089936}, "389c54cbca": {"quality": 0.0702991452991453, "cost": 0.005610244, "time": 48.15491397380829}, "38ec11cf7b": {"quality": 0.0, "cost": 0.007187318999999999, "time": 103.98532650470733}, "3980f20caa": {"quality": 0.04807692307692308, "cost": 0.014391898, "time": 100.08462851047516}, "39cd4ca402": {"quality": 0.025, "cost": 0.003120591, "time": 75.18385965824127}, "3a34b24c41": {"quality": 0.21752136752136753, "cost": 0.002965395, "time": 73.19122822284699}, "3b2e8075ea": {"quality": 0.2098290598290598, "cost": 0.003075393, "time": 70.68897068500519}, "3b3676521a": {"quality": 0.21752136752136753, "cost": 0.013715839000000002, "time": 107.15725564956665}, "3b3a6bf087": {"quality": 0.30982905982905984, "cost": 0.006470724000000001, "time": 74.07467935085296}, "3b4bde0121": {"quality": 0.0, "cost": 0.00302925, "time": 77.53029568195343}, "3b57530a56": {"quality": 0.2098290598290598, "cost": 0.002663507, "time": 48.332898473739625}, "3c206c89f3": {"quality": 0.21752136752136753, "cost": 0.008676515999999999, "time": 115.085169839859}, "3cbab8082e": {"quality": 0.09871794871794873, "cost": 0.008985135, "time": 116.89071977138519}, "3d71c4dd2c": {"quality": 0.10982905982905983, "cost": 0.006464912999999999, "time": 86.58996284008026}, "3ea15ac20c": {"quality": 0.11538461538461539, "cost": 0.007241556, "time": 125.68785231113434}, "3f1a58aec9": {"quality": 0.22863247863247865, "cost": 0.00189393, "time": 45.61551144123077}, "3f2321bb08": {"quality": 0.125, "cost": 0.000410382, "time": 33.271996068954465}, "3f3ef494b0": {"quality": 0.0, "cost": 0.001176912, "time": 50.03896124362946}, "3f62c3fbfc": {"quality": 0.21752136752136753, "cost": 0.004284207, "time": 95.22124736309053}, "3f88dd99f7": {"quality": 0.19252136752136753, "cost": 0.005681200000000001, "time": 63.5021466255188}, "3fa747af9a": {"quality": 0.15833333333333333, "cost": 0.002231574, "time": 96.99003422260284}, "40104c813f": {"quality": 0.0, "cost": 0.010208657000000001, "time": 125.99128649234771}, "403b05da2d": {"quality": 0.08141025641025641, "cost": 0.010538071999999999, "time": 94.6719701051712}, "4043815a3e": {"quality": 0.21752136752136753, "cost": 0.013145456, "time": 124.91901173591614}, "409ff67607": {"quality": 0.06944444444444445, "cost": 0.004053711, "time": 99.38009803295135}, "412c065b83": {"quality": 0.0, "cost": 0.008236578, "time": 92.44097802639007}, "4171fbac5c": {"quality": 0.19444444444444442, "cost": 0.009067686, "time": 114.00634713172913}, "4191118787": {"quality": 0.1987179487179487, "cost": 0.008734028999999999, "time": 109.63090167045593}, "41d5b97871": {"quality": 0.0, "cost": 0.002514906, "time": 62.46952087879181}, "41d8845655": {"quality": 0.2098290598290598, "cost": 0.010189181000000002, "time": 59.657727408409116}, "41ee202cac": {"quality": 0.06538461538461539, "cost": 0.000921442, "time": 54.925141382217404}, "42082dcd0d": {"quality": 0.1987179487179487, "cost": 0.007583567999999999, "time": 108.27059605121613}, "42ddd48341": {"quality": 0.22094017094017093, "cost": 0.014396616, "time": 104.56641755104064}, "4361bc7ea7": {"quality": 0.13333333333333333, "cost": 0.001473496, "time": 63.439108324050906}, "43afdad250": {"quality": 0.22863247863247865, "cost": 0.011869643, "time": 108.28341348171234}, "43c3cf9cb8": {"quality": 0.2064102564102564, "cost": 0.0068331120000000006, "time": 53.15135598182678}, "43d24fb32a": {"quality": 0.07307692307692308, "cost": 0.00976587, "time": 85.51874697208405}, "440bc872de": {"quality": 0.07222222222222222, "cost": 0.007188219, "time": 121.43468098640442}, "44173a9aef": {"quality": 0.30641025641025643, "cost": 0.01244934, "time": 115.21545617580415}, "44d6af5523": {"quality": 0.2098290598290598, "cost": 0.00476793, "time": 81.34744908809662}, "44fe4e4e3e": {"quality": 0.11752136752136753, "cost": 0.01136529, "time": 81.14654626846314}, "4587a1500c": {"quality": 0.3175213675213675, "cost": 0.006090974000000002, "time": 79.77526035308838}, "45ef93b61e": {"quality": 0.1987179487179487, "cost": 0.003150051, "time": 81.59605078697206}, "461846a52d": {"quality": 0.1814102564102564, "cost": 0.003777498, "time": 82.82821555137635}, "462e6ff849": {"quality": 0.11752136752136753, "cost": 0.010684958, "time": 113.33498125076294}, "4630853d32": {"quality": 0.06752136752136753, "cost": 0.008278823000000001, "time": 86.25940473079682}, "46475b9e75": {"quality": 0.2098290598290598, "cost": 0.0061740449999999995, "time": 87.75788187980652}, "46654a1f32": {"quality": 0.0, "cost": 0.011978384, "time": 67.87502360343933}, "466a3036b2": {"quality": 0.09252136752136753, "cost": 0.010652967, "time": 114.7115253686905}, "466d4d16dd": {"quality": 0.18482905982905984, "cost": 0.008396510000000001, "time": 86.67418849468231}, "46a35022d8": {"quality": 0.1952991452991453, "cost": 0.000766365, "time": 59.83990566730499}, "46ed68152d": {"quality": 0.18333333333333332, "cost": 0.01143953, "time": 92.87648718357087}, "46edc488a4": {"quality": 0.059829059829059825, "cost": 0.0012849899999999997, "time": 50.55697724819183}, "476a12876c": {"quality": 0.11752136752136753, "cost": 0.006154817999999999, "time": 81.70997877120972}, "48043e2304": {"quality": 0.2098290598290598, "cost": 0.009217511000000001, "time": 89.0722332715988}, "488645cbd9": {"quality": 0.2064102564102564, "cost": 0.001493856, "time": 48.69374096393585}, "49009a3b57": {"quality": 0.059829059829059825, "cost": 0.007598573999999999, "time": 78.47919590473174}, "4909061216": {"quality": 0.21752136752136753, "cost": 0.013011773, "time": 104.75487668514252}, "49107972df": {"quality": 0.025, "cost": 0.007639823999999999, "time": 118.11295173168182}, "49731b1ccd": {"quality": 0.0952991452991453, "cost": 0.006953804999999999, "time": 77.60816009044646}, "498f146004": {"quality": 0.11752136752136753, "cost": 0.008050870000000002, "time": 112.59319038391114}, "49ad844bd2": {"quality": 0.2098290598290598, "cost": 0.008512473, "time": 109.38159234523773}, "49ca727e49": {"quality": 0.2098290598290598, "cost": 0.000746866, "time": 34.221199584007266}, "4a4a960a82": {"quality": 0.13333333333333333, "cost": 0.007445242000000001, "time": 84.403648686409}, "4a92372986": {"quality": 0.16538461538461538, "cost": 0.00319335, "time": 83.45380291938781}, "4aa7e8fde6": {"quality": 0.10641025641025642, "cost": 0.0019865879999999996, "time": 83.08999395370483}, "4aafd39d76": {"quality": 0.14038461538461539, "cost": 0.005373513, "time": 82.9023279428482}, "4ace1cfad1": {"quality": 0.16538461538461538, "cost": 0.009508553999999999, "time": 106.34385075569153}, "4ad1952206": {"quality": 0.20213675213675214, "cost": 0.0085493, "time": 83.97655324935913}, "4b59f40131": {"quality": 0.04807692307692308, "cost": 0.006207440999999999, "time": 83.43397681713105}, "4b86a1c038": {"quality": 0.2064102564102564, "cost": 0.00792555, "time": 106.1481963634491}, "4b92a26754": {"quality": 0.0, "cost": 0.00047688399999999996, "time": 69.90886387825012}, "4bc4528402": {"quality": 0.16538461538461538, "cost": 0.004527411, "time": 81.34518160820008}, "4c158a1a4a": {"quality": 0.08205128205128205, "cost": 0.007535195999999999, "time": 78.38852503299714}, "4c954323e3": {"quality": 0.21752136752136753, "cost": 0.00974773, "time": 103.17564594745636}, "4d8bcf8ae2": {"quality": 0.125, "cost": 0.000539128, "time": 69.75875072479249}, "4d91e8a27b": {"quality": 0.21752136752136753, "cost": 0.010584353000000001, "time": 76.16667878627777}, "4dd3635bc3": {"quality": 0.2098290598290598, "cost": 0.012259316000000001, "time": 78.35238783359529}, "4dd96bd18f": {"quality": 0.21752136752136753, "cost": 0.011986314000000001, "time": 105.92028946876526}, "4e298ee0d4": {"quality": 0.1814102564102564, "cost": 0.008259033, "time": 95.37437601089476}, "4e3443a0f9": {"quality": 0.11752136752136753, "cost": 0.013054975, "time": 106.09084448814392}, "4e4b9db2b8": {"quality": 0.21752136752136753, "cost": 0.007140224000000001, "time": 72.97610201835633}, "4e6509f614": {"quality": 0.061111111111111116, "cost": 0.0053766, "time": 47.53030514717102}, "4e6a83e751": {"quality": 0.07371794871794872, "cost": 0.001089142, "time": 53.57680480480194}, "4e8d8e527a": {"quality": 0.14871794871794872, "cost": 0.0051247079999999995, "time": 85.9967652797699}, "4ef333ab21": {"quality": 0.19444444444444442, "cost": 0.001917522, "time": 79.47004499435425}, "4f16545711": {"quality": 0.06666666666666667, "cost": 0.005466131999999999, "time": 88.67296268939972}, "4f8cca1195": {"quality": 0.21752136752136753, "cost": 0.010168095, "time": 114.15254590511321}, "500860eaa2": {"quality": 0.0, "cost": 0.0022933619999999997, "time": 38.243334197998045}, "50701b505e": {"quality": 0.1987179487179487, "cost": 0.0064327049999999995, "time": 79.18819501399994}, "50bc87e9cc": {"quality": 0.1987179487179487, "cost": 0.005166006000000001, "time": 86.32211146354675}, "50c03be77c": {"quality": 0.10982905982905983, "cost": 0.00168543, "time": 94.39419853687286}, "510375edad": {"quality": 0.0, "cost": 0.0021525, "time": 67.02195911407472}, "512fdb607c": {"quality": 0.2098290598290598, "cost": 0.008402651, "time": 120.24980294704437}, "51583a901c": {"quality": 0.2098290598290598, "cost": 0.010226283000000001, "time": 109.70203943252564}, "521314dab6": {"quality": 0.19252136752136753, "cost": 0.0064746199999999995, "time": 59.58354845046997}, "5241bf401b": {"quality": 0.07222222222222222, "cost": 0.006321017999999999, "time": 82.37689163684846}, "526878b5eb": {"quality": 0.07371794871794872, "cost": 0.008675337, "time": 115.97295072078705}, "52c1cba6ce": {"quality": 0.21752136752136753, "cost": 0.009671612999999999, "time": 108.82956576347351}, "52e5d0f4fb": {"quality": 0.2064102564102564, "cost": 0.001697778, "time": 88.22295272350311}, "52f041a70e": {"quality": 0.2098290598290598, "cost": 0.001067476, "time": 32.61202094554901}, "5307496302": {"quality": 0.2098290598290598, "cost": 0.005388657, "time": 86.62635931968688}, "53869388bb": {"quality": 0.061111111111111116, "cost": 0.0021395499999999996, "time": 39.578048658370975}, "53d2932c4f": {"quality": 0.2814102564102564, "cost": 0.007557482000000001, "time": 73.93080537319183}, "5474247f91": {"quality": 0.0, "cost": 0.008248308, "time": 80.75084838867187}, "557d2cf7ba": {"quality": 0.19444444444444442, "cost": 0.009174846, "time": 81.92483322620392}, "559c7120c5": {"quality": 0.14038461538461539, "cost": 0.009353153, "time": 113.30842261314393}, "55c8aa8935": {"quality": 0.04807692307692308, "cost": 0.01165586, "time": 114.18454928398131}, "56a29a28c5": {"quality": 0.04807692307692308, "cost": 0.004978499999999999, "time": 55.467578983306886}, "56b39eb1d6": {"quality": 0.21752136752136753, "cost": 0.009558101, "time": 131.33570635318756}, "5703697dbd": {"quality": 0.2098290598290598, "cost": 0.006108023999999999, "time": 79.37135038375854}, "5718f2ed80": {"quality": 0.10641025641025642, "cost": 0.003117972, "time": 91.63201060295106}, "572a02a59a": {"quality": 0.19444444444444442, "cost": 0.004321886999999999, "time": 100.45879077911377}, "572c2df793": {"quality": 0.04038461538461539, "cost": 0.004394319000000001, "time": 143.467453289032}, "5750713a41": {"quality": 0.2098290598290598, "cost": 0.006046722, "time": 92.31352968215941}, "57757ef15e": {"quality": 0.1876068376068376, "cost": 0.006326642, "time": 106.06430275440215}, "5793d14bbe": {"quality": 0.2098290598290598, "cost": 0.009510144000000002, "time": 139.83697164058685}, "579a915ed2": {"quality": 0.08333333333333334, "cost": 0.011355894000000002, "time": 109.32498943805695}, "579c81bbe0": {"quality": 0.025, "cost": 0.004824098, "time": 75.78087794780731}, "57bed1722f": {"quality": 0.025, "cost": 0.005056998, "time": 85.83169932365418}, "585ba6d20b": {"quality": 0.21752136752136753, "cost": 0.007866175, "time": 151.51093373298644}, "589a1cea79": {"quality": 0.05, "cost": 0.009551150000000001, "time": 151.7195835828781}, "59006532b4": {"quality": 0.16538461538461538, "cost": 0.007208474999999999, "time": 118.06772444248199}, "59326c4e00": {"quality": 0.015384615384615385, "cost": 0.005300058, "time": 72.70066883563996}, "596f0ed542": {"quality": 0.023076923076923078, "cost": 0.011558136, "time": 137.05188086032868}, "5971ba4e0d": {"quality": 0.21752136752136753, "cost": 0.0018305540000000003, "time": 43.38757050037384}, "59e0117b7d": {"quality": 0.21752136752136753, "cost": 0.006857488, "time": 139.82263526916503}, "59f515d0da": {"quality": 0.2064102564102564, "cost": 0.00388749, "time": 108.1351862192154}, "59f887b67c": {"quality": 0.0, "cost": 0.010208398, "time": 146.34244396686552}, "5a22920db4": {"quality": 0.1987179487179487, "cost": 0.0018006569999999998, "time": 75.78275735378266}, "5a35020d45": {"quality": 0.1987179487179487, "cost": 0.0060887910000000005, "time": 109.82949197292328}, "5aa71bb88a": {"quality": 0.12094017094017094, "cost": 0.000638436, "time": 67.15015056133271}, "5b10fbdbe1": {"quality": 0.21752136752136753, "cost": 0.013089312000000002, "time": 128.6064488887787}, "5b4ad39a9e": {"quality": 0.11752136752136753, "cost": 0.0032681159999999997, "time": 99.70189120769501}, "5bade9eb85": {"quality": 0.18482905982905984, "cost": 0.008712719999999998, "time": 120.3732186794281}, "5be16744bf": {"quality": 0.1952991452991453, "cost": 0.013749790000000001, "time": 91.71922521591186}, "5c0db11303": {"quality": 0.058333333333333334, "cost": 0.002397702, "time": 93.91862483024596}, "5c53feccd9": {"quality": 0.023076923076923078, "cost": 0.015926556, "time": 118.78805196285248}, "5c77c7c2b2": {"quality": 0.15, "cost": 0.008434941000000001, "time": 85.08626408576964}, "5d072194b8": {"quality": 0.11752136752136753, "cost": 0.007648864000000002, "time": 104.28334321975709}, "5d79b50feb": {"quality": 0.14038461538461539, "cost": 0.013589224, "time": 84.18065688610076}, "5dc216cd6b": {"quality": 0.10982905982905983, "cost": 0.010508788, "time": 83.96197824478149}, "5dd68c1b8f": {"quality": 0.16944444444444445, "cost": 0.001444968, "time": 54.59350550174713}, "5de4a882c1": {"quality": 0.21752136752136753, "cost": 0.008734155, "time": 75.31518497467042}, "5deeeb223f": {"quality": 0.24316239316239316, "cost": 0.005724273, "time": 115.48380098342895}, "5e2f03b962": {"quality": 0.1876068376068376, "cost": 0.0007302779999999999, "time": 48.90210340023041}, "5ea2fab380": {"quality": 0.05149572649572649, "cost": 0.0048052319999999996, "time": 47.63824257850647}, "5eb3bb525b": {"quality": 0.0702991452991453, "cost": 0.008967890000000001, "time": 96.21212074756622}, "5ec3832817": {"quality": 0.0, "cost": 0.008163805, "time": 148.1108601331711}, "5f0199e07b": {"quality": 0.025, "cost": 0.00068568, "time": 49.921340346336365}, "5f37b3902b": {"quality": 0.04038461538461539, "cost": 0.007223498, "time": 77.24008927345275}, "60cb623c53": {"quality": 0.04871794871794872, "cost": 0.002396232, "time": 54.30748798847199}, "612e546d71": {"quality": 0.10705128205128206, "cost": 0.008148234, "time": 151.1273174762726}, "6160bfb439": {"quality": 0.11474358974358972, "cost": 0.010103339999999999, "time": 148.44622106552123}, "6178f33808": {"quality": 0.04722222222222222, "cost": 0.006706226999999999, "time": 153.97144429683686}, "619b48dde9": {"quality": 0.21752136752136753, "cost": 0.006427425999999999, "time": 120.5339899301529}, "6268ac658c": {"quality": 0.24316239316239316, "cost": 0.01069585, "time": 144.7754723072052}, "628f34aace": {"quality": 0.2098290598290598, "cost": 0.005102127, "time": 151.19322457313538}, "630d1ecda0": {"quality": 0.0, "cost": 0.00503412, "time": 158.05562288761138}, "63a0aaebed": {"quality": 0.15833333333333333, "cost": 0.001865547, "time": 88.02101895809173}, "63f392465f": {"quality": 0.18333333333333332, "cost": 0.0020362590000000003, "time": 114.81161072254181}, "6527f214c3": {"quality": 0.125, "cost": 0.0020534459999999996, "time": 107.1269181728363}, "652c0f4bdf": {"quality": 0.1, "cost": 0.013503214, "time": 140.5670464038849}, "6533c85913": {"quality": 0.10641025641025642, "cost": 0.009929774999999998, "time": 122.77552180290222}, "65627426e0": {"quality": 0.15, "cost": 0.000700312, "time": 55.69904806613922}, "65801893b4": {"quality": 0.025, "cost": 0.01106166, "time": 113.8573011636734}, "65b76da9c6": {"quality": 0.12222222222222223, "cost": 0.007336156, "time": 88.77893948554993}, "65be1c1306": {"quality": 0.06752136752136753, "cost": 0.001988206, "time": 34.18720245361328}, "65e0216208": {"quality": 0.05, "cost": 0.008013333, "time": 111.02614188194275}, "65eee615d7": {"quality": 0.1987179487179487, "cost": 0.000519715, "time": 32.74071106910706}, "66277da52f": {"quality": 0.09871794871794873, "cost": 0.0036039389999999996, "time": 110.55973320007324}, "66750c0934": {"quality": 0.21752136752136753, "cost": 0.007508133, "time": 85.75274183750153}, "66776ec181": {"quality": 0.10641025641025642, "cost": 0.006989541, "time": 111.77394149303436}, "67632141f6": {"quality": 0.04038461538461539, "cost": 0.004542882, "time": 63.92582683563233}, "677deb302a": {"quality": 0.1814102564102564, "cost": 0.007409720999999999, "time": 111.17820315361024}, "67868fcff6": {"quality": 0.21752136752136753, "cost": 0.011676892000000001, "time": 81.84591567516327}, "67aad9ea16": {"quality": 0.2098290598290598, "cost": 0.007452915, "time": 126.7279718399048}, "67bab6732d": {"quality": 0.10555555555555557, "cost": 0.01121626, "time": 84.01423738002777}, "67fe399cf1": {"quality": 0.21752136752136753, "cost": 0.007087558, "time": 80.51065149307252}, "6846bd8fb3": {"quality": 0.08141025641025641, "cost": 0.006947139, "time": 85.74786493778228}, "68583552fb": {"quality": 0.0, "cost": 0.0053533439999999995, "time": 93.39369978904725}, "689e327daf": {"quality": 0.08333333333333334, "cost": 0.000799428, "time": 57.74340398311615}, "69b3b67de6": {"quality": 0.23974358974358972, "cost": 0.008200215, "time": 95.98227195739746}, "69bf3f6ba0": {"quality": 0.21752136752136753, "cost": 0.008827848, "time": 102.32003858089448}, "69f90e610f": {"quality": 0.22585470085470086, "cost": 0.011046368, "time": 121.27799794673919}, "6a022c3f73": {"quality": 0.21944444444444444, "cost": 0.004195854, "time": 99.68456645011902}, "6a10c53ad8": {"quality": 0.32863247863247863, "cost": 0.012164633000000001, "time": 119.7128321170807}, "6a6348f69d": {"quality": 0.025, "cost": 0.0010719339999999999, "time": 79.13316841125489}, "6a8726145c": {"quality": 0.1876068376068376, "cost": 0.0014679629999999996, "time": 77.99199786186219}, "6a8a675442": {"quality": 0.22094017094017093, "cost": 0.005341439999999999, "time": 90.32394058704377}, "6aac59742a": {"quality": 0.0, "cost": 0.009132285, "time": 137.44288988113402}, "6ac193c88f": {"quality": 0.25085470085470085, "cost": 0.006668448, "time": 121.86276133060456}, "6ae9e9de0b": {"quality": 0.21752136752136753, "cost": 0.0077003290000000005, "time": 125.54181215763091}, "6b0c585f5c": {"quality": 0.21752136752136753, "cost": 0.006663018, "time": 122.58973615169526}, "6b3c16def2": {"quality": 0.18482905982905984, "cost": 0.001533927, "time": 51.53218412399292}, "6c05c47050": {"quality": 0.1987179487179487, "cost": 0.003248111999999999, "time": 96.47181532382965}, "6c3667811b": {"quality": 0.21474358974358973, "cost": 0.005244696, "time": 89.6854020357132}, "6cc813aa68": {"quality": 0.14807692307692308, "cost": 0.005938046000000001, "time": 58.51538195610046}, "6cd78cac7e": {"quality": 0.09444444444444444, "cost": 0.007387408, "time": 101.36276533603669}, "6d20c6ace0": {"quality": 0.21752136752136753, "cost": 0.024144216, "time": 121.96203644275664}, "6d67c56ba6": {"quality": 0.17307692307692307, "cost": 0.009393948, "time": 150.0689915418625}, "6db70dc3b6": {"quality": 0.04038461538461539, "cost": 0.0015085300000000001, "time": 94.44009244441986}, "6e0690f576": {"quality": 0.0, "cost": 0.001440486, "time": 74.4156935930252}, "6e3db7ec5e": {"quality": 0.21752136752136753, "cost": 0.009058176, "time": 141.13843698501586}, "6e62bbb47f": {"quality": 0.15982905982905982, "cost": 0.0032870549999999997, "time": 139.53261284828187}, "6e859bfae6": {"quality": 0.05, "cost": 0.004340145, "time": 162.00480234622955}, "6e93514f45": {"quality": 0.0, "cost": 0.0047780819999999995, "time": 95.28718383312224}, "6eae47102b": {"quality": 0.21752136752136753, "cost": 0.008071342, "time": 93.64182848930359}, "6ecf93c479": {"quality": 0.17307692307692307, "cost": 0.0028209449999999996, "time": 103.17148315906525}, "6ef3b7127e": {"quality": 0.21752136752136753, "cost": 0.006768256, "time": 68.10584411621093}, "6f323f80c7": {"quality": 0.0, "cost": 0.00037804, "time": 104.40542347431182}, "6f60a05c33": {"quality": 0.15833333333333333, "cost": 0.0037825470000000003, "time": 92.0291631937027}, "6fbdd8b57c": {"quality": 0.1, "cost": 0.002123712, "time": 95.29761900901795}, "6fd6046c4b": {"quality": 0.18333333333333332, "cost": 0.005121717, "time": 132.3336772441864}, "6fe0b3f929": {"quality": 0.0, "cost": 0.006223385999999999, "time": 68.93117754459381}, "6ff4f667f8": {"quality": 0.19252136752136753, "cost": 0.006652730000000001, "time": 87.94405431747437}, "700474dfbd": {"quality": 0.15833333333333333, "cost": 0.0028382069999999997, "time": 102.84161474704743}, "700ab1d309": {"quality": 0.023076923076923078, "cost": 0.017122988, "time": 117.11154954433441}, "7040e83d52": {"quality": 0.24166666666666664, "cost": 0.019616124999999998, "time": 101.78968648910522}, "704209377f": {"quality": 0.0876068376068376, "cost": 0.005003547, "time": 138.1696399450302}, "70b666e371": {"quality": 0.18482905982905984, "cost": 0.004856766, "time": 104.8691722393036}, "70c850e039": {"quality": 0.18333333333333332, "cost": 0.002363835, "time": 67.29307141304017}, "7112a7e64c": {"quality": 0.25085470085470085, "cost": 0.0058677550000000005, "time": 63.623961353302}, "7114013f0c": {"quality": 0.16538461538461538, "cost": 0.0022517069999999995, "time": 125.79093027114868}, "715070d0ca": {"quality": 0.025, "cost": 0.006732683999999999, "time": 135.16606471538546}, "71b615468b": {"quality": 0.09252136752136753, "cost": 0.010796074, "time": 94.77488939762115}, "71ed893462": {"quality": 0.19444444444444442, "cost": 0.011514972000000002, "time": 137.31028594970704}, "722d41b2f8": {"quality": 0.125, "cost": 0.006919008000000001, "time": 110.66710319519044}, "723fd5589a": {"quality": 0.125, "cost": 0.012675105000000002, "time": 105.89904611110688}, "7250da0f41": {"quality": 0.0, "cost": 0.0072467339999999995, "time": 74.25018970966339}, "7260a96349": {"quality": 0.10641025641025642, "cost": 0.006264144, "time": 160.2630994796753}, "7347cf0308": {"quality": 0.0, "cost": 0.003343845, "time": 72.2089759349823}, "736e652158": {"quality": 0.0, "cost": 0.005920191, "time": 146.5679278612137}, "738364d6a2": {"quality": 0.0, "cost": 0.009448194, "time": 159.01038644313812}, "739b1f81dc": {"quality": 0.1987179487179487, "cost": 0.0017973389999999998, "time": 73.72460424900055}, "73c6240c29": {"quality": 0.1, "cost": 0.005322242999999999, "time": 159.0038095474243}, "73fc2767b9": {"quality": 0.2064102564102564, "cost": 0.0037065989999999997, "time": 126.9765938282013}, "7445d99939": {"quality": 0.1814102564102564, "cost": 0.007021181999999999, "time": 111.21800615787507}, "7466a5f424": {"quality": 0.3175213675213675, "cost": 0.015386124, "time": 150.73878495693208}, "74cc4b1bc4": {"quality": 0.04038461538461539, "cost": 0.006254895, "time": 117.36351308822631}, "74d7f64b8c": {"quality": 0.0, "cost": 0.006549831000000001, "time": 151.8392071723938}, "751869cbec": {"quality": 0.015384615384615385, "cost": 0.007969879, "time": 134.38790624141694}, "7524905580": {"quality": 0.2098290598290598, "cost": 0.004221619, "time": 93.99037828445435}, "752d9649f2": {"quality": 0.25085470085470085, "cost": 0.01328176, "time": 121.08108768463134}, "7558c9722d": {"quality": 0.2064102564102564, "cost": 0.008134605999999999, "time": 120.57110471725464}, "75ca9cd4f8": {"quality": 0.1876068376068376, "cost": 0.0004974599999999999, "time": 68.25030062198638}, "7604c0aa13": {"quality": 0.015384615384615385, "cost": 0.010546463999999998, "time": 90.40108380317687}, "765dbc6ad5": {"quality": 0.21752136752136753, "cost": 0.003435049, "time": 94.19224355220794}, "7707e6e7e3": {"quality": 0.11474358974358972, "cost": 0.010274625999999999, "time": 94.13086493015288}, "7765576286": {"quality": 0.2098290598290598, "cost": 0.008545683, "time": 127.77431318759919}, "77983b6105": {"quality": 0.0, "cost": 0.002284218, "time": 68.26882412433625}, "77b5740025": {"quality": 0.015384615384615385, "cost": 0.006498776, "time": 106.45389134883881}, "77c02b00c1": {"quality": 0.257051282051282, "cost": 0.010423473999999999, "time": 126.61633729934692}, "77c6a9703a": {"quality": 0.2098290598290598, "cost": 0.007362192, "time": 108.2735918521881}, "77f293b737": {"quality": 0.21752136752136753, "cost": 0.009682152999999999, "time": 136.6608712911606}, "7801da66b9": {"quality": 0.0, "cost": 0.004307142, "time": 92.96408789157867}, "7862ea67cb": {"quality": 0.15149572649572648, "cost": 0.0014619539999999997, "time": 74.10243089199066}, "786e5d0af5": {"quality": 0.22094017094017093, "cost": 0.009905354999999998, "time": 129.44383957386017}, "795d119bc7": {"quality": 0.0, "cost": 0.006895664000000001, "time": 81.85152049064637}, "7989343d94": {"quality": 0.11666666666666667, "cost": 0.0030767639999999996, "time": 104.29451589584352}, "79fad58f07": {"quality": 0.21752136752136753, "cost": 0.002502802, "time": 51.48524146080017}, "7a207b42a8": {"quality": 0.023076923076923078, "cost": 0.008630430000000001, "time": 131.3380735397339}, "7a58d3472b": {"quality": 0.06752136752136753, "cost": 0.012301829, "time": 96.64494693279266}, "7a7cc658c8": {"quality": 0.06944444444444445, "cost": 0.009218845, "time": 130.32710280418394}, "7b024a2966": {"quality": 0.14807692307692308, "cost": 0.009636592999999999, "time": 127.10254328250885}, "7b6dc3702e": {"quality": 0.11752136752136753, "cost": 0.01032963, "time": 98.65728087425232}, "7b6f44618e": {"quality": 0.21752136752136753, "cost": 0.012669284, "time": 119.66786665916442}, "7b74b23910": {"quality": 0.06538461538461539, "cost": 0.012630904, "time": 93.18642947673797}, "7b9cc96081": {"quality": 0.14807692307692308, "cost": 0.000405328, "time": 79.7800312757492}, "7c45c61d8d": {"quality": 0.21752136752136753, "cost": 0.006698508, "time": 122.86696994304657}, "7c89a2b69e": {"quality": 0.025, "cost": 0.012249355, "time": 127.1623036623001}, "7d44f0959d": {"quality": 0.21752136752136753, "cost": 0.006413139, "time": 93.95755279064178}, "7d60c38c5c": {"quality": 0.2098290598290598, "cost": 0.0017982599999999999, "time": 60.89626235961914}, "7d67e14414": {"quality": 0.11752136752136753, "cost": 0.0037651139999999995, "time": 123.35564484596253}, "7daf7ff182": {"quality": 0.13974358974358975, "cost": 0.011279536, "time": 93.00705258846284}, "7e0ad1c9c1": {"quality": 0.07371794871794872, "cost": 0.00508167, "time": 85.57242858409882}, "7ed07ad40a": {"quality": 0.2098290598290598, "cost": 0.00644937, "time": 84.29863061904908}, "7fa67a7656": {"quality": 0.07307692307692308, "cost": 0.007675500000000001, "time": 69.2791738986969}, "806881adcb": {"quality": 0.2064102564102564, "cost": 0.008654110999999999, "time": 86.79695003032684}, "80a1d9c2f3": {"quality": 0.15833333333333333, "cost": 0.005246394, "time": 105.42429778575897}, "80be7df955": {"quality": 0.0, "cost": 0.0014326019999999998, "time": 58.2230907201767}, "80bf60c422": {"quality": 0.16538461538461538, "cost": 0.001969395, "time": 61.00985796451569}, "81333c7a33": {"quality": 0.1987179487179487, "cost": 0.018448848000000004, "time": 86.57931089401245}, "813e75210b": {"quality": 0.23632478632478632, "cost": 0.002293942, "time": 33.89890332221985}, "815e7116df": {"quality": 0.041025641025641026, "cost": 0.00256722, "time": 102.2627355337143}, "816068ff07": {"quality": 0.15833333333333333, "cost": 0.0041238989999999994, "time": 127.76587257385255}, "81a4f42fd9": {"quality": 0.21752136752136753, "cost": 0.005694090000000001, "time": 59.030240178108215}, "828ccea2d3": {"quality": 0.04038461538461539, "cost": 0.008288664, "time": 109.34065868854523}, "829df73946": {"quality": 0.2098290598290598, "cost": 0.002660831, "time": 58.90988037586212}, "831728b179": {"quality": 0.0, "cost": 0.005039063999999999, "time": 71.040651845932}, "831e8b8be5": {"quality": 0.1987179487179487, "cost": 0.002362275, "time": 99.75399866104127}, "8357183895": {"quality": 0.0, "cost": 0.008517975, "time": 130.41329088211057}, "8392a6083a": {"quality": 0.2098290598290598, "cost": 0.013261482, "time": 129.61621696949004}, "83b244c163": {"quality": 0.2098290598290598, "cost": 0.0017275300000000001, "time": 63.67854707241058}, "83c9e66ec6": {"quality": 0.2237179487179487, "cost": 0.007498675, "time": 99.27389492988587}, "842c0d1062": {"quality": 0.20705128205128204, "cost": 0.005142987, "time": 166.0325870513916}, "846bed2aa7": {"quality": 0.24252136752136752, "cost": 0.006434445, "time": 98.26402361392975}, "847fd49235": {"quality": 0.17863247863247866, "cost": 0.004663961999999999, "time": 112.9008763551712}, "84dc98be95": {"quality": 0.19444444444444442, "cost": 0.007237588999999999, "time": 108.35145225524903}, "8519bef585": {"quality": 0.015384615384615385, "cost": 0.0024311339999999997, "time": 63.10895071029663}, "8572c6af3a": {"quality": 0.22863247863247865, "cost": 0.005362141000000001, "time": 159.52487354278566}, "85c94a5505": {"quality": 0.05, "cost": 0.0025202259999999995, "time": 34.404055738449095}, "85e8eaed6e": {"quality": 0.06538461538461539, "cost": 0.004890077999999999, "time": 158.11657013893125}, "862183bfb9": {"quality": 0.21752136752136753, "cost": 0.007488814999999999, "time": 120.0406931400299}, "8668f65f05": {"quality": 0.21752136752136753, "cost": 0.009651689000000001, "time": 144.70214662551882}, "870e2f87b4": {"quality": 0.21752136752136753, "cost": 0.013074957000000002, "time": 150.40512261390685}, "87c1b31c82": {"quality": 0.15982905982905982, "cost": 0.011328134, "time": 152.22725927829742}, "88436e05a9": {"quality": 0.09252136752136753, "cost": 0.010687336, "time": 153.95657515525818}, "887ad124e1": {"quality": 0.1987179487179487, "cost": 0.0030405569999999997, "time": 113.64234898090362}, "8886cb3082": {"quality": 0.2098290598290598, "cost": 0.009527662999999999, "time": 144.03201706409453}, "8940398bf1": {"quality": 0.04038461538461539, "cost": 0.001635585, "time": 86.64000837802887}, "8941621423": {"quality": 0.1737179487179487, "cost": 0.006337113, "time": 113.57799446582794}, "8961e4d901": {"quality": 0.0, "cost": 0.007177176, "time": 104.54208600521088}, "8974aa89a0": {"quality": 0.2098290598290598, "cost": 0.001461674, "time": 45.66577224731445}, "89a289907e": {"quality": 0.25085470085470085, "cost": 0.008718126999999999, "time": 113.69486925601959}, "89a35a09b1": {"quality": 0.21752136752136753, "cost": 0.007729346999999999, "time": 109.72029886245727}, "89bc21961a": {"quality": 0.18482905982905984, "cost": 0.000487962, "time": 54.24717800617218}, "89fbefd150": {"quality": 0.23974358974358972, "cost": 0.00825181, "time": 111.73369183540345}, "8a37c82283": {"quality": 0.23974358974358972, "cost": 0.007666522, "time": 83.14605205059051}, "8aaadb8649": {"quality": 0.025, "cost": 0.006027930000000001, "time": 66.97984294891357}, "8acd758b7f": {"quality": 0.2098290598290598, "cost": 0.004582077, "time": 86.44987049102784}, "8b721bbc6f": {"quality": 0.21752136752136753, "cost": 0.007828845000000001, "time": 117.83376302719117}, "8b90f4b639": {"quality": 0.0, "cost": 0.00041072999999999994, "time": 49.55189027786255}, "8bbbe0f52a": {"quality": 0.1814102564102564, "cost": 0.006133866, "time": 83.5189700126648}, "8bc184f385": {"quality": 0.1702991452991453, "cost": 0.007205481, "time": 86.5741204738617}, "8bf5c3eadc": {"quality": 0.025, "cost": 0.006170141999999999, "time": 83.84268100261687}, "8c195addc7": {"quality": 0.16944444444444445, "cost": 0.01389775, "time": 90.39201626777648}, "8c9881972c": {"quality": 0.0626068376068376, "cost": 0.0028528439999999998, "time": 86.04524366855621}, "8cf8b81d84": {"quality": 0.125, "cost": 0.0008481899999999999, "time": 79.2455335855484}, "8d79e03266": {"quality": 0.21752136752136753, "cost": 0.008664971, "time": 108.97852082252503}, "8d90814b94": {"quality": 0.21752136752136753, "cost": 0.009054151, "time": 130.47983191013338}, "8e33fac90f": {"quality": 0.30982905982905984, "cost": 0.010998730000000002, "time": 130.90265057086944}, "8e5842ccbd": {"quality": 0.14038461538461539, "cost": 0.003239019, "time": 70.15636944770813}, "8f4caddfe6": {"quality": 0.21752136752136753, "cost": 0.014952506, "time": 105.19670691490174}, "8f4edde3f0": {"quality": 0.3175213675213675, "cost": 0.017418426, "time": 126.13828411102295}, "900a58f984": {"quality": 0.2064102564102564, "cost": 0.006433365999999999, "time": 98.91697227954865}, "9025e2480f": {"quality": 0.1987179487179487, "cost": 0.006619497, "time": 78.82729182243347}, "9028588af4": {"quality": 0.3175213675213675, "cost": 0.00501719, "time": 37.611924695968625}, "9059fd80ad": {"quality": 0.04038461538461539, "cost": 0.0018876219999999998, "time": 75.3997132062912}, "90d5e40c1b": {"quality": 0.0, "cost": 0.006230459999999999, "time": 77.66650586128236}, "90d9a86a2a": {"quality": 0.0, "cost": 0.001509345, "time": 48.08298280239106}, "90ed9312e1": {"quality": 0.16944444444444445, "cost": 0.008080153999999999, "time": 122.39040281772614}, "90ff13783c": {"quality": 0.24316239316239316, "cost": 0.010701109, "time": 137.6493337869644}, "90ff8eb055": {"quality": 0.0702991452991453, "cost": 0.009148667999999999, "time": 137.7864047050476}, "9104e31369": {"quality": 0.06944444444444445, "cost": 0.012645782999999999, "time": 140.74590210914613}, "918983323f": {"quality": 0.12307692307692308, "cost": 0.0011798999999999998, "time": 42.641357612609866}, "91beb0cac1": {"quality": 0.125, "cost": 0.000758454, "time": 83.41916146278382}, "91c800af6b": {"quality": 0.04038461538461539, "cost": 0.011276811, "time": 143.70840940475466}, "91dd8884db": {"quality": 0.20705128205128204, "cost": 0.005775448000000001, "time": 141.27923700809478}, "924f128b3c": {"quality": 0.21752136752136753, "cost": 0.013985312, "time": 127.0183181285858}, "9288642e53": {"quality": 0.1, "cost": 0.006031014000000001, "time": 71.42156167030335}, "92c4137fb1": {"quality": 0.25085470085470085, "cost": 0.006908012999999999, "time": 145.73949379920958}, "92c9dcd43b": {"quality": 0.21752136752136753, "cost": 0.008693151, "time": 141.44435067176818}, "92f45e5cc7": {"quality": 0.23974358974358972, "cost": 0.0006816839999999999, "time": 69.65455045700074}, "93011c0821": {"quality": 0.16752136752136754, "cost": 0.012118556999999999, "time": 129.1434654712677}, "9303149ba4": {"quality": 0.04807692307692308, "cost": 0.007609685, "time": 148.0766399145126}, "933b4d17dd": {"quality": 0.22863247863247865, "cost": 0.004478119000000001, "time": 98.24304263591766}, "94010928c6": {"quality": 0.1814102564102564, "cost": 0.006916301999999999, "time": 109.81094932556152}, "9403809e44": {"quality": 0.21752136752136753, "cost": 0.011799824, "time": 132.59149780273435}, "943baaea0c": {"quality": 0.17307692307692307, "cost": 0.011566795000000001, "time": 117.94117500782014}, "94569f177a": {"quality": 0.015384615384615385, "cost": 0.002410395, "time": 125.42731764316558}, "9466542023": {"quality": 0.03333333333333333, "cost": 0.0057622739999999995, "time": 136.34905714988707}, "947e28ef2e": {"quality": 0.06538461538461539, "cost": 0.0031674240000000003, "time": 124.9436069726944}, "94ac356663": {"quality": 0.1737179487179487, "cost": 0.007041248, "time": 104.0579169511795}, "94dff9a424": {"quality": 0.1987179487179487, "cost": 0.005677325, "time": 105.50971393585205}, "9508356a2e": {"quality": 0.14807692307692308, "cost": 0.006912872999999999, "time": 129.4876657009125}, "956bdcc254": {"quality": 0.2064102564102564, "cost": 0.015445604, "time": 125.50174486637115}, "9594b0c783": {"quality": 0.2098290598290598, "cost": 0.003143199, "time": 109.28865358829498}, "964c671f18": {"quality": 0.2098290598290598, "cost": 0.011780327, "time": 147.56322202682497}, "9679fe2b69": {"quality": 0.4098290598290598, "cost": 0.003103125, "time": 112.40209789276122}, "968fc95038": {"quality": 0.21752136752136753, "cost": 0.00356376, "time": 109.65150537490845}, "96b487c724": {"quality": 0.07371794871794872, "cost": 0.0006113920000000001, "time": 69.84607322216033}, "96e85f9af4": {"quality": 0.058333333333333334, "cost": 0.0037689600000000005, "time": 115.5232797384262}, "96f87d6483": {"quality": 0.19444444444444442, "cost": 0.012450801, "time": 146.1588816165924}, "972c83b002": {"quality": 0.14807692307692308, "cost": 0.007372287, "time": 112.55022113323211}, "975bc44958": {"quality": 0.24871794871794872, "cost": 0.0024022649999999998, "time": 111.18246881961822}, "977a4d6b6b": {"quality": 0.33974358974358976, "cost": 0.013285368000000002, "time": 138.42745580673215}, "97ad4cd41a": {"quality": 0.025, "cost": 0.008055923000000001, "time": 147.88769648075103}, "97bc30bd83": {"quality": 0.19252136752136753, "cost": 0.010554706, "time": 107.51760149002075}, "97e1d0db92": {"quality": 0.06538461538461539, "cost": 0.004098666000000001, "time": 98.48549189567566}, "981da9ba40": {"quality": 0.09871794871794873, "cost": 0.008754687, "time": 150.4159719467163}, "98c1ea89f3": {"quality": 0.1987179487179487, "cost": 0.004756095, "time": 144.49644501209258}, "98eca2c65c": {"quality": 0.2098290598290598, "cost": 0.007576088, "time": 113.79488031864167}, "98ecf1a157": {"quality": 0.2098290598290598, "cost": 0.0006460169999999999, "time": 66.48974347114563}, "9927dc270b": {"quality": 0.22863247863247865, "cost": 0.008019299, "time": 142.80323901176453}, "99546d91e4": {"quality": 0.2098290598290598, "cost": 0.0023279909999999997, "time": 109.73371806144715}, "99cb0ba736": {"quality": 0.0, "cost": 0.001753854, "time": 80.39895787239075}, "99e44ab9b2": {"quality": 0.2064102564102564, "cost": 0.007417979999999999, "time": 142.21234567165374}, "9a0145c9b5": {"quality": 0.07371794871794872, "cost": 0.003074034, "time": 115.01807222366332}, "9a57ea3f89": {"quality": 0.1952991452991453, "cost": 0.007409801999999998, "time": 133.90688972473146}, "9a8420a0b3": {"quality": 0.0, "cost": 0.0068175779999999995, "time": 152.5392238378525}, "9aa32e6c96": {"quality": 0.21752136752136753, "cost": 0.01103525, "time": 141.08924725055692}, "9aa4abfb50": {"quality": 0.0, "cost": 0.0034209360000000003, "time": 89.89876456260681}, "9ada932bf5": {"quality": 0.21752136752136753, "cost": 0.008047750999999999, "time": 144.1339359998703}, "9b6d4915f3": {"quality": 0.0, "cost": 0.00463566, "time": 54.38807971477509}, "9bae5bafc1": {"quality": 0.18333333333333332, "cost": 0.013702148, "time": 98.45531895160676}, "9c549db0a7": {"quality": 0.0, "cost": 0.010721709000000001, "time": 79.0128826379776}, "9c595a2bc9": {"quality": 0.025, "cost": 0.007724396, "time": 148.95668649673462}, "9c85f8cfcb": {"quality": 0.015384615384615385, "cost": 0.0023850119999999997, "time": 67.28472025394439}, "9c8cc46e6c": {"quality": 0.06538461538461539, "cost": 0.002001585, "time": 77.71750602722167}, "9c97d35a30": {"quality": 0.2098290598290598, "cost": 0.005077656, "time": 118.16123571395875}, "9ca354a53e": {"quality": 0.0, "cost": 0.003021384, "time": 101.3728716135025}, "9ce2c3fd98": {"quality": 0.0, "cost": 0.0047293560000000005, "time": 129.50395069122314}, "9d18cd0737": {"quality": 0.07307692307692308, "cost": 0.004835114999999999, "time": 82.27081375122071}, "9d7142e7b4": {"quality": 0.16944444444444445, "cost": 0.008467907, "time": 147.60730669498443}, "9d778daa24": {"quality": 0.03333333333333333, "cost": 0.007418613, "time": 153.25245223045349}, "9e06360bc9": {"quality": 0.2098290598290598, "cost": 0.008218711, "time": 113.20028014183045}, "9fb157be35": {"quality": 0.1, "cost": 0.008187586, "time": 116.65422949790954}, "9fc44fdeb1": {"quality": 0.0, "cost": 0.009764671999999999, "time": 163.913742351532}, "9ffaa26d5a": {"quality": 0.21752136752136753, "cost": 0.006876598000000001, "time": 107.02223331928253}, "a041e7777a": {"quality": 0.17649572649572648, "cost": 0.0054005519999999994, "time": 107.31747977733612}, "a0b81be5b4": {"quality": 0.21752136752136753, "cost": 0.002235789, "time": 84.43192894458771}, "a0c85d260e": {"quality": 0.21752136752136753, "cost": 0.013538011000000003, "time": 134.37746741771696}, "a0dc9f50ac": {"quality": 0.15833333333333333, "cost": 0.0058347719999999985, "time": 73.68754653930664}, "a14c507393": {"quality": 0.0, "cost": 0.0016790159999999998, "time": 85.27930269241332}, "a1881eb481": {"quality": 0.23974358974358972, "cost": 0.006974355, "time": 158.60932910442352}, "a1bb32e6a1": {"quality": 0.04038461538461539, "cost": 0.0019489619999999998, "time": 113.17418549060821}, "a2347e8e9e": {"quality": 0.08482905982905983, "cost": 0.003918969, "time": 112.96018514633178}, "a2aa082d14": {"quality": 0.0, "cost": 0.012548850000000002, "time": 97.43302309513092}, "a2cd339ad9": {"quality": 0.09252136752136753, "cost": 0.009654009, "time": 161.78442559242248}, "a2fd03e6a5": {"quality": 0.1876068376068376, "cost": 0.004650046, "time": 46.97034850120544}, "a31e87d7cb": {"quality": 0.2064102564102564, "cost": 0.00146718, "time": 73.75970520973206}, "a344b2d79a": {"quality": 0.2098290598290598, "cost": 0.008522825, "time": 167.94721865653992}, "a3c0ea3342": {"quality": 0.3175213675213675, "cost": 0.007620170000000001, "time": 166.26401495933533}, "a456d75fef": {"quality": 0.0, "cost": 0.003897582, "time": 149.68703374862673}, "a457f6c300": {"quality": 0.0, "cost": 0.007946964, "time": 158.9001291036606}, "a4767e7679": {"quality": 0.0, "cost": 0.002210106, "time": 111.21880123615264}, "a47de025c8": {"quality": 0.0, "cost": 0.003860679, "time": 106.15438146591185}, "a4ad96343d": {"quality": 0.23696581196581196, "cost": 0.009205054, "time": 161.23487601280215}, "a515a9c8cc": {"quality": 0.16944444444444445, "cost": 0.0021845939999999998, "time": 90.79483761787415}, "a5949b76ec": {"quality": 0.0, "cost": 0.0022742099999999996, "time": 77.40827825069428}, "a5ae4dfe66": {"quality": 0.1, "cost": 0.006415026, "time": 117.52768676280975}, "a60dd076b8": {"quality": 0.1564102564102564, "cost": 0.0024921179999999998, "time": 99.6796523809433}, "a6297a6c56": {"quality": 0.125, "cost": 0.0019724219999999997, "time": 83.12536749839782}, "a6460dbb7c": {"quality": 0.2098290598290598, "cost": 0.002657791, "time": 71.54174087047576}, "a6796ed686": {"quality": 0.09444444444444444, "cost": 0.0032055929999999996, "time": 130.10447964668273}, "a6d2b05ec8": {"quality": 0.2098290598290598, "cost": 0.008059675, "time": 133.3959671497345}, "a717c4c535": {"quality": 0.11752136752136753, "cost": 0.012759294000000001, "time": 123.11250400543213}, "a721cd9ebf": {"quality": 0.21752136752136753, "cost": 0.006374966000000001, "time": 93.21334941387177}, "a8090787b1": {"quality": 0.2098290598290598, "cost": 0.004406745, "time": 124.60862724781036}, "a854343d46": {"quality": 0.225, "cost": 0.0063598100000000005, "time": 87.96014966964722}, "a86b137d7f": {"quality": 0.15, "cost": 0.0021869159999999997, "time": 58.69887585639954}, "a88eb1493c": {"quality": 0.0952991452991453, "cost": 0.004839059999999999, "time": 60.44982805252076}, "a88fb984e3": {"quality": 0.21752136752136753, "cost": 0.008328870999999998, "time": 131.90409784317018}, "a94e2e5f57": {"quality": 0.04807692307692308, "cost": 0.006114, "time": 102.40570263862611}, "a95b4a6dd0": {"quality": 0.0452991452991453, "cost": 0.008242788000000001, "time": 101.27096877098083}, "a9621ea4e6": {"quality": 0.22863247863247865, "cost": 0.00707055, "time": 100.2644911289215}, "a96e22379d": {"quality": 0.13482905982905985, "cost": 0.004674597, "time": 135.9614454984665}, "a9721a0a50": {"quality": 0.18482905982905984, "cost": 0.006856040000000001, "time": 74.35422718524933}, "aa08180e36": {"quality": 0.21752136752136753, "cost": 0.009903308, "time": 141.78917632102966}, "aa38702a02": {"quality": 0.058333333333333334, "cost": 0.004223736, "time": 108.32900211811065}, "aa8187c023": {"quality": 0.0, "cost": 0.006725136, "time": 115.54149260520936}, "aadbfc418b": {"quality": 0.04807692307692308, "cost": 0.003954492, "time": 115.65469679832458}, "ab1c706436": {"quality": 0.09871794871794873, "cost": 0.007816556999999998, "time": 157.3217898607254}, "ab43b02cb0": {"quality": 0.0, "cost": 0.010739148, "time": 88.10484538078308}, "aba21780bc": {"quality": 0.09594017094017093, "cost": 0.001212108, "time": 68.86885101795197}, "abbca95f00": {"quality": 0.1841880341880342, "cost": 0.00179952, "time": 77.29247143268586}, "ac208e7a1d": {"quality": 0.21752136752136753, "cost": 0.009940174000000001, "time": 106.78546745777129}, "ac7fcf90e2": {"quality": 0.059829059829059825, "cost": 0.008491644, "time": 163.5559736728668}, "ac828ffe70": {"quality": 0.025, "cost": 0.000761398, "time": 72.60451011657715}, "ac9fdc1550": {"quality": 0.2098290598290598, "cost": 0.012285289000000001, "time": 112.57146043777465}, "ad328d5108": {"quality": 0.1611111111111111, "cost": 0.001292802, "time": 134.6257879257202}, "ad3efe44c3": {"quality": 0.0, "cost": 0.002330118, "time": 83.25956127643585}, "ad48432c22": {"quality": 0.0, "cost": 0.009617565, "time": 165.95421760082246}, "ad5187a390": {"quality": 0.22863247863247865, "cost": 0.010695554, "time": 160.50709626674652}, "ad6ebbba8d": {"quality": 0.16752136752136754, "cost": 0.0075785209999999995, "time": 114.89612691402435}, "ad90055ef6": {"quality": 0.0, "cost": 0.003493998, "time": 103.62199125289916}, "ad97c5cee6": {"quality": 0.15149572649572648, "cost": 0.0022562339999999998, "time": 137.5054316520691}, "adab1e0fb1": {"quality": 0.3175213675213675, "cost": 0.00585581, "time": 77.00263237953186}, "ae655ec593": {"quality": 0.11666666666666667, "cost": 0.003635154, "time": 112.42764747142792}, "ae94b172be": {"quality": 0.1987179487179487, "cost": 0.007214128000000001, "time": 117.53457653522491}, "af360c323c": {"quality": 0.0, "cost": 0.0032059769999999996, "time": 116.28576538562774}, "af90567194": {"quality": 0.21752136752136753, "cost": 0.013109858999999998, "time": 144.84608309268953}, "b0156bb6d2": {"quality": 0.25085470085470085, "cost": 0.011064174, "time": 144.33307461738588}, "b03c31ca45": {"quality": 0.09871794871794873, "cost": 0.007032179999999999, "time": 159.58066523075104}, "b0530b98c3": {"quality": 0.08333333333333334, "cost": 0.007914135, "time": 151.2534171819687}, "b0948c05b6": {"quality": 0.16538461538461538, "cost": 0.011687544000000001, "time": 77.90738432407379}, "b18168b9c1": {"quality": 0.1952991452991453, "cost": 0.007746042, "time": 102.36798930168152}, "b1cf8d33e5": {"quality": 0.0702991452991453, "cost": 0.007221741, "time": 96.63562579154969}, "b1dcd7aa24": {"quality": 0.2098290598290598, "cost": 0.009499193, "time": 142.64096357822416}, "b214718d07": {"quality": 0.16538461538461538, "cost": 0.002830191, "time": 105.70124731063842}, "b28925e4b8": {"quality": 0.06538461538461539, "cost": 0.011244171, "time": 117.42571413516998}, "b2b057ba41": {"quality": 0.21752136752136753, "cost": 0.0036646409999999997, "time": 104.08598182201385}, "b2e063499d": {"quality": 0.21752136752136753, "cost": 0.008730451, "time": 99.22217960357665}, "b3369775dc": {"quality": 0.2064102564102564, "cost": 0.012187132, "time": 128.45301840305328}, "b35bf038c2": {"quality": 0.14038461538461539, "cost": 0.0027645659999999996, "time": 118.8479066848755}, "b363b25367": {"quality": 0.10982905982905983, "cost": 0.00807849, "time": 150.05790014266967}, "b3decd5c2f": {"quality": 0.14722222222222223, "cost": 0.0054683579999999996, "time": 114.51114053726197}, "b3f20b706d": {"quality": 0.025, "cost": 0.002810052, "time": 90.74897117614745}, "b4002173ee": {"quality": 0.10641025641025642, "cost": 0.00532104, "time": 71.4259075164795}, "b45fc30d81": {"quality": 0.015384615384615385, "cost": 0.007027874999999999, "time": 148.3045286655426}, "b4a259f6dd": {"quality": 0.2098290598290598, "cost": 0.00846508, "time": 139.42276346683502}, "b52cdb3c6d": {"quality": 0.25085470085470085, "cost": 0.001953663, "time": 101.56977760791779}, "b56c312eda": {"quality": 0.21752136752136753, "cost": 0.008759253, "time": 135.296657371521}, "b5a02bb8ab": {"quality": 0.09252136752136753, "cost": 0.005621411999999999, "time": 126.08018836975097}, "b5e2b41c1c": {"quality": 0.1987179487179487, "cost": 0.004181511, "time": 97.78416235446929}, "b64ddb14f9": {"quality": 0.21752136752136753, "cost": 0.002377122, "time": 40.39987435340882}, "b66118d5f2": {"quality": 0.04038461538461539, "cost": 0.007001360999999999, "time": 141.72445845603943}, "b67107a43e": {"quality": 0.0, "cost": 0.005282744999999999, "time": 134.97200748920443}, "b682a23b89": {"quality": 0.2098290598290598, "cost": 0.004507057, "time": 94.8906276702881}, "b690a1ddd6": {"quality": 0.0, "cost": 0.004254936000000001, "time": 132.1497477531433}, "b796b7ffd3": {"quality": 0.21752136752136753, "cost": 0.008754787, "time": 134.97328844070432}, "b7d0e8557f": {"quality": 0.20982905982905983, "cost": 0.007772019, "time": 136.01008360385896}, "b7f203a0bf": {"quality": 0.09871794871794873, "cost": 0.0074011649999999995, "time": 142.89838807582856}, "b81d5b2bd9": {"quality": 0.04807692307692308, "cost": 0.008468160999999998, "time": 139.87127735614774}, "b8343f05e1": {"quality": 0.04722222222222222, "cost": 0.0071142839999999985, "time": 114.42319309711456}, "b8ab3d2f25": {"quality": 0.21752136752136753, "cost": 0.008466568, "time": 101.01810977458953}, "b8b569172f": {"quality": 0.10705128205128206, "cost": 0.010024281999999999, "time": 115.06630852222443}, "b8b91e375d": {"quality": 0.21752136752136753, "cost": 0.012645420000000001, "time": 146.17070028781893}, "b8c685904d": {"quality": 0.0, "cost": 0.0017556779999999999, "time": 98.82595324516296}, "b8d1903276": {"quality": 0.025, "cost": 0.004277349, "time": 116.25438375473021}, "b91e7fdb29": {"quality": 0.0, "cost": 0.009623747999999998, "time": 172.33287427425387}, "b9770c2261": {"quality": 0.24871794871794872, "cost": 0.0018192599999999998, "time": 68.26051120758056}, "b9bb1e6f8d": {"quality": 0.21752136752136753, "cost": 0.012109797999999998, "time": 153.9570437669754}, "b9da208432": {"quality": 0.22094017094017093, "cost": 0.009226414999999998, "time": 152.71056313514708}, "ba3223f6ac": {"quality": 0.17307692307692307, "cost": 0.003906612, "time": 117.72781562805176}, "bac3d23c31": {"quality": 0.21752136752136753, "cost": 0.011714114000000001, "time": 149.72536618709563}, "bb3ee18de1": {"quality": 0.2064102564102564, "cost": 0.00687103, "time": 112.67253947257996}, "bb6536b0ab": {"quality": 0.12585470085470085, "cost": 0.010593946, "time": 116.58704767227172}, "bb70f60bf1": {"quality": 0.1952991452991453, "cost": 0.0029260320000000003, "time": 113.47788968086243}, "bbba9dd6ae": {"quality": 0.3175213675213675, "cost": 0.005866453000000001, "time": 81.5745332479477}, "bbfba2f2ee": {"quality": 0.0, "cost": 0.0043119479999999995, "time": 144.39048359394076}, "bc3d02f753": {"quality": 0.0, "cost": 0.002622999, "time": 100.0285652399063}, "bc4c1fcc64": {"quality": 0.21752136752136753, "cost": 0.005892321000000001, "time": 80.69622204303741}, "bc60556255": {"quality": 0.025, "cost": 0.000541426, "time": 90.43072290420533}, "bcae7c2fc4": {"quality": 0.0952991452991453, "cost": 0.006106959, "time": 118.90915808677673}, "bcef42e3b0": {"quality": 0.20705128205128204, "cost": 0.003036438, "time": 126.3029891014099}, "bcf4bf7c35": {"quality": 0.09252136752136753, "cost": 0.010682502, "time": 163.63021724224092}, "bcfb273436": {"quality": 0.2675213675213675, "cost": 0.005338022999999999, "time": 154.81484963893888}, "bd99b2fb21": {"quality": 0.21752136752136753, "cost": 0.00784732, "time": 116.22147076129913}, "bdf497196b": {"quality": 0.21752136752136753, "cost": 0.00564954, "time": 81.09978458881378}, "be2ae88f70": {"quality": 0.0, "cost": 0.004892166, "time": 134.319625210762}, "be4740f38f": {"quality": 0.11752136752136753, "cost": 0.011724527999999998, "time": 111.63968887329102}, "bed888d4dc": {"quality": 0.12222222222222223, "cost": 0.007974548999999997, "time": 167.2181126832962}, "bf2a5d2680": {"quality": 0.08141025641025641, "cost": 0.005895564, "time": 116.7006804227829}, "bf7b0a8dc1": {"quality": 0.21752136752136753, "cost": 0.0072987239999999995, "time": 172.42680845260622}, "bfed7670ed": {"quality": 0.08482905982905983, "cost": 0.010263118, "time": 118.4290988445282}, "c06b118e65": {"quality": 0.20705128205128204, "cost": 0.005422905, "time": 111.81110198497773}, "c08a5ad170": {"quality": 0.19252136752136753, "cost": 0.008962954000000002, "time": 153.46751430034638}, "c0d53a20de": {"quality": 0.2098290598290598, "cost": 0.007020855, "time": 151.7084014415741}, "c10e588987": {"quality": 0.1814102564102564, "cost": 0.0070173779999999995, "time": 161.47480256557463}, "c127509a7a": {"quality": 0.30982905982905984, "cost": 0.012152782, "time": 114.87017834186554}, "c13682c7c7": {"quality": 0.21752136752136753, "cost": 0.011820473999999997, "time": 155.98555040359497}, "c13d6e78e9": {"quality": 0.2098290598290598, "cost": 0.015482613, "time": 135.19117062091829}, "c145482664": {"quality": 0.125, "cost": 0.011503632, "time": 157.4289677143097}, "c14ff3144d": {"quality": 0.15833333333333333, "cost": 0.001474128, "time": 73.17416946887971}, "c186182658": {"quality": 0.0702991452991453, "cost": 0.00651766, "time": 114.43175661563873}, "c263c65d0a": {"quality": 0.2098290598290598, "cost": 0.004897752, "time": 161.65783095359802}, "c2949aa902": {"quality": 0.2064102564102564, "cost": 0.006985964999999999, "time": 112.66284742355347}, "c31c9d4d8c": {"quality": 0.058333333333333334, "cost": 0.004036233, "time": 118.73210837841035}, "c31e956b35": {"quality": 0.10705128205128206, "cost": 0.008987883, "time": 156.68281931877135}, "c339463a25": {"quality": 0.11538461538461539, "cost": 0.0039407339999999996, "time": 142.9191876411438}, "c36b525dde": {"quality": 0.2098290598290598, "cost": 0.00295329, "time": 84.8311204433441}, "c3d20f33bf": {"quality": 0.15085470085470087, "cost": 0.009346874, "time": 155.06189365386962}, "c3ec2cec59": {"quality": 0.3175213675213675, "cost": 0.010893408, "time": 121.71417863368988}, "c443a2c1fb": {"quality": 0.09444444444444444, "cost": 0.008210058, "time": 158.09974863529203}, "c4a80d19b3": {"quality": 0.3175213675213675, "cost": 0.010017962999999998, "time": 152.05399761199953}, "c4c2826afd": {"quality": 0.14722222222222223, "cost": 0.008821047, "time": 150.96840312480924}, "c4c94a5527": {"quality": 0.14807692307692308, "cost": 0.007312788, "time": 118.44996173381804}, "c4f3e7665d": {"quality": 0.1, "cost": 0.004909806, "time": 122.77528305053711}, "c58e9652b7": {"quality": 0.03333333333333333, "cost": 0.004275492, "time": 163.37369763851166}, "c5a16b834a": {"quality": 0.24252136752136752, "cost": 0.009288249, "time": 145.43366010189055}, "c5fbe2076f": {"quality": 0.21752136752136753, "cost": 0.016178868, "time": 153.44052112102509}, "c6198f364e": {"quality": 0.21752136752136753, "cost": 0.008840571000000002, "time": 143.01735095977784}, "c691570715": {"quality": 0.025, "cost": 0.004222068, "time": 105.88032670021057}, "c691a29c42": {"quality": 0.06944444444444445, "cost": 0.014146588, "time": 136.24555165767669}, "c6a339987c": {"quality": 0.14807692307692308, "cost": 0.008871827000000002, "time": 124.18036108016967}, "c6a4d256ce": {"quality": 0.11752136752136753, "cost": 0.011863845, "time": 131.42284343242645}, "c76222087e": {"quality": 0.10982905982905983, "cost": 0.007390348000000001, "time": 108.10401859283448}, "c772ff3704": {"quality": 0.10982905982905983, "cost": 0.008256624, "time": 122.45812864303589}, "c7d4ff0c05": {"quality": 0.22863247863247865, "cost": 0.006689247, "time": 135.3987812280655}, "c823f7ab29": {"quality": 0.19252136752136753, "cost": 0.006112934, "time": 110.04468183517456}, "c82b926689": {"quality": 0.21752136752136753, "cost": 0.011001155000000002, "time": 132.11898975372316}, "c82f834e85": {"quality": 0.21752136752136753, "cost": 0.007510131000000001, "time": 88.01416339874268}, "c9320068f9": {"quality": 0.11474358974358972, "cost": 0.008706341999999999, "time": 138.78494324684144}, "c935a33384": {"quality": 0.23974358974358972, "cost": 0.013361947999999998, "time": 128.84306230545045}, "c99f3577c7": {"quality": 0.09444444444444444, "cost": 0.012227506, "time": 131.7928378343582}, "c9d32a0a82": {"quality": 0.1, "cost": 0.00772979, "time": 150.54008300304412}, "ca55c36c3f": {"quality": 0.2098290598290598, "cost": 0.002033097, "time": 109.44059422016144}, "caa7c0bd6b": {"quality": 0.1737179487179487, "cost": 0.007166354999999999, "time": 108.27636570930481}, "cac6b051e9": {"quality": 0.2098290598290598, "cost": 0.007765482000000001, "time": 140.63682539463042}, "cb19d631b2": {"quality": 0.2098290598290598, "cost": 0.0074525500000000005, "time": 109.26569464206696}, "cbb25fb322": {"quality": 0.19444444444444442, "cost": 0.004088256, "time": 146.00942890644075}, "cbb5eb0e74": {"quality": 0.2098290598290598, "cost": 0.007899888, "time": 146.89739501476288}, "cbc32cbeff": {"quality": 0.2098290598290598, "cost": 0.008663157000000001, "time": 144.39441390037535}, "cbd4461293": {"quality": 0.09871794871794873, "cost": 0.0019465299999999997, "time": 53.78504421710968}, "cbe2318045": {"quality": 0.18482905982905984, "cost": 0.00824068, "time": 99.53602702617644}, "cc20ebc768": {"quality": 0.13974358974358975, "cost": 0.008763393999999999, "time": 151.47476081848146}, "cc886fe337": {"quality": 0.05, "cost": 0.006118446, "time": 104.02940430641175}, "ccf72745c1": {"quality": 0.21752136752136753, "cost": 0.00881063, "time": 104.33854112625122}, "cd1d418732": {"quality": 0.0, "cost": 0.005609648, "time": 51.80708644390106}, "cd23c79db1": {"quality": 0.0, "cost": 0.008759461999999999, "time": 130.19892246723174}, "cd64fbfcd9": {"quality": 0.02222222222222222, "cost": 0.006661377, "time": 132.06611769199372}, "cda3e2a4e9": {"quality": 0.19252136752136753, "cost": 0.003251412, "time": 130.11529834270476}, "cdc9ce922f": {"quality": 0.1876068376068376, "cost": 0.00361428, "time": 130.60064585208892}, "cdf0df2f51": {"quality": 0.16538461538461538, "cost": 0.005876955, "time": 170.43609290122987}, "ce281875b4": {"quality": 0.2098290598290598, "cost": 0.002271957, "time": 134.98369066715242}, "ce4bc5f348": {"quality": 0.04038461538461539, "cost": 0.0031947119999999997, "time": 97.7704703092575}, "ce980cf86f": {"quality": 0.125, "cost": 0.003918672, "time": 130.57520353794098}, "cecca90dd2": {"quality": 0.1987179487179487, "cost": 0.004469783999999999, "time": 136.02725315093994}, "cece83de2d": {"quality": 0.10982905982905983, "cost": 0.0060226260000000005, "time": 134.92675962448118}, "cf51e0a888": {"quality": 0.025, "cost": 0.007815034000000002, "time": 141.21634793281555}, "cf9538faf0": {"quality": 0.21752136752136753, "cost": 0.009526646, "time": 136.5719269990921}, "cf9d2e224c": {"quality": 0.11752136752136753, "cost": 0.005909796, "time": 82.14293384552002}, "cfd36f3a8c": {"quality": 0.2064102564102564, "cost": 0.015834602, "time": 133.97862401008604}, "cffa29a6ef": {"quality": 0.1987179487179487, "cost": 0.00053197, "time": 57.04082276821137}, "d03596c3de": {"quality": 0.23974358974358972, "cost": 0.007832440999999999, "time": 161.64306690692902}, "d0f9633442": {"quality": 0.0, "cost": 0.007817082, "time": 133.32561285495757}, "d192872a51": {"quality": 0.06944444444444445, "cost": 0.011960976000000002, "time": 151.55924673080443}, "d1d953cac7": {"quality": 0.023076923076923078, "cost": 0.0069616439999999995, "time": 129.82648131847384}, "d2164c8c4c": {"quality": 0.25085470085470085, "cost": 0.007942218000000001, "time": 106.56984751224518}, "d216eab7d8": {"quality": 0.23974358974358972, "cost": 0.008338906, "time": 110.89031755924225}, "d266c19ac8": {"quality": 0.18482905982905984, "cost": 0.005187306, "time": 137.3413758277893}, "d2af24b59e": {"quality": 0.19252136752136753, "cost": 0.008368941999999999, "time": 107.03122828006744}, "d3a2d50bd7": {"quality": 0.2098290598290598, "cost": 0.004220073, "time": 104.34660010337831}, "d3db4cf84d": {"quality": 0.0, "cost": 0.0043255260000000005, "time": 86.13068716526033}, "d40174fb0b": {"quality": 0.015384615384615385, "cost": 0.004637364, "time": 72.65830745697022}, "d43fafa19e": {"quality": 0.023076923076923078, "cost": 0.004984457999999999, "time": 69.79157807826996}, "d48ead13da": {"quality": 0.04807692307692308, "cost": 0.007356842000000001, "time": 108.29052205085753}, "d5016f4538": {"quality": 0.023076923076923078, "cost": 0.004924038, "time": 63.6337170124054}, "d55e983189": {"quality": 0.2098290598290598, "cost": 0.0023029619999999995, "time": 103.95245385169983}, "d573c2a414": {"quality": 0.04444444444444444, "cost": 0.003798615, "time": 146.4341695547104}, "d58036ba66": {"quality": 0.21752136752136753, "cost": 0.007977542, "time": 101.040083694458}, "d59bacbfe0": {"quality": 0.09252136752136753, "cost": 0.009014214999999999, "time": 152.89927747249604}, "d6040140b9": {"quality": 0.21752136752136753, "cost": 0.013529567000000001, "time": 147.0174176454544}, "d640edd7a7": {"quality": 0.2064102564102564, "cost": 0.0030975000000000004, "time": 118.49127612113952}, "d65185c1a4": {"quality": 0.2098290598290598, "cost": 0.0048733019999999995, "time": 101.63841800689698}, "d690b6d739": {"quality": 0.13760683760683762, "cost": 0.0024723749999999997, "time": 115.86975026130676}, "d6bd3b66ba": {"quality": 0.0, "cost": 0.0037659959999999998, "time": 111.267901968956}, "d6c4e48eeb": {"quality": 0.21752136752136753, "cost": 0.008788099, "time": 116.89563345909119}, "d6c60a5214": {"quality": 0.17307692307692307, "cost": 0.0012564660000000001, "time": 69.47750086784363}, "d6cbf265ee": {"quality": 0.06538461538461539, "cost": 0.00160378, "time": 72.91289446353912}, "d73a9aab4e": {"quality": 0.04871794871794872, "cost": 0.0032188139999999995, "time": 79.5758284330368}, "d752c30d07": {"quality": 0.3175213675213675, "cost": 0.015617612, "time": 127.57117984294892}, "d7c0972014": {"quality": 0.19444444444444442, "cost": 0.0105163, "time": 80.39488637447357}, "d7f6c0c9d4": {"quality": 0.0, "cost": 0.002123865, "time": 94.49595766067506}, "d813410e44": {"quality": 0.04807692307692308, "cost": 0.002987295, "time": 133.61272318363189}, "d87eb775da": {"quality": 0.2098290598290598, "cost": 0.008013359000000001, "time": 119.17614867687226}, "d8bab6c09b": {"quality": 0.07307692307692308, "cost": 0.011752513, "time": 155.0189305305481}, "d8bcac36e8": {"quality": 0.025, "cost": 0.010723482, "time": 115.70462930202484}, "d96677d8d4": {"quality": 0.21752136752136753, "cost": 0.006440180999999999, "time": 108.42673063278198}, "d9e2bb21a3": {"quality": 0.19594017094017094, "cost": 0.005149365, "time": 107.66626167297363}, "daaadadcc9": {"quality": 0.21752136752136753, "cost": 0.009858946, "time": 142.47570564746857}, "daf855e065": {"quality": 0.1987179487179487, "cost": 0.005835996000000001, "time": 153.1451871395111}, "db6f7259cd": {"quality": 0.17307692307692307, "cost": 0.007230853000000001, "time": 107.38680810928344}, "db9060cd27": {"quality": 0.025, "cost": 0.00183247, "time": 75.05918591022493}, "dbce95a072": {"quality": 0.10641025641025642, "cost": 0.0072439719999999996, "time": 107.68909630775451}, "dbe7d818fa": {"quality": 0.14444444444444443, "cost": 0.0020769660000000004, "time": 109.260413813591}, "dc66bccb1c": {"quality": 0.2098290598290598, "cost": 0.00518188, "time": 101.09860997200012}, "dc90065dea": {"quality": 0.11752136752136753, "cost": 0.010587499, "time": 108.61172733306884}, "dc9a912501": {"quality": 0.06538461538461539, "cost": 0.004132665, "time": 122.22055804729462}, "dd0d70fedd": {"quality": 0.1987179487179487, "cost": 0.0018010649999999997, "time": 72.4727225780487}, "dd76899626": {"quality": 0.0, "cost": 0.01104418, "time": 163.46480329036712}, "dd9f5d1ba9": {"quality": 0.1987179487179487, "cost": 0.003202473, "time": 168.22212414741517}, "de1645053b": {"quality": 0.21752136752136753, "cost": 0.009485828, "time": 156.01319019794465}, "de18bf45e1": {"quality": 0.03333333333333333, "cost": 0.001299202, "time": 79.47014775276185}, "de1e56370f": {"quality": 0.2098290598290598, "cost": 0.0039003330000000006, "time": 95.9440367937088}, "deb84ddd06": {"quality": 0.0, "cost": 0.006051836, "time": 89.6067531824112}, "df2bb408cf": {"quality": 0.125, "cost": 0.0037215599999999996, "time": 133.46323487758636}, "df2ebe2c01": {"quality": 0.1987179487179487, "cost": 0.0045562350000000005, "time": 174.54163272380828}, "dfb8aebe38": {"quality": 0.21752136752136753, "cost": 0.008408229, "time": 166.86121113300322}, "dfce6153aa": {"quality": 0.08482905982905983, "cost": 0.00791217, "time": 165.6665771961212}, "dfda94bd2a": {"quality": 0.0, "cost": 0.0017028, "time": 85.28198800086975}, "dff452a9ca": {"quality": 0.0, "cost": 0.005131782, "time": 116.72450284957885}, "e02f982a26": {"quality": 0.2064102564102564, "cost": 0.00861959, "time": 133.95475313663482}, "e06701b665": {"quality": 0.07222222222222222, "cost": 0.005247648, "time": 155.51543612480162}, "e18abd2ab0": {"quality": 0.19252136752136753, "cost": 0.008510738, "time": 142.51286814212799}, "e1e596ee1b": {"quality": 0.06944444444444445, "cost": 0.009328152, "time": 170.3236501932144}, "e20ba014a1": {"quality": 0.25085470085470085, "cost": 0.010510256999999999, "time": 149.4374349117279}, "e223700849": {"quality": 0.10982905982905983, "cost": 0.002881368, "time": 120.50577642917634}, "e26c7bfbdb": {"quality": 0.08141025641025641, "cost": 0.007298063999999999, "time": 115.83573853969574}, "e35e5f81a7": {"quality": 0.0, "cost": 0.006075344999999999, "time": 117.27980465888976}, "e3cdc0d870": {"quality": 0.09444444444444444, "cost": 0.00339135, "time": 131.6535663843155}, "e3df4cf041": {"quality": 0.06538461538461539, "cost": 0.007586271, "time": 106.25668971538545}, "e3eca9854c": {"quality": 0.10705128205128206, "cost": 0.004585331999999999, "time": 159.58829066753387}, "e47dc3abca": {"quality": 0.09252136752136753, "cost": 0.011292871999999999, "time": 107.18091866970062}, "e495ff601f": {"quality": 0.14807692307692308, "cost": 0.007825082, "time": 156.01348607540132}, "e4b9d4fb41": {"quality": 0.14038461538461539, "cost": 0.007481999, "time": 112.45064301490783}, "e510bda989": {"quality": 0.2098290598290598, "cost": 0.005625576, "time": 35.45525462627411}, "e515bc1935": {"quality": 0.2098290598290598, "cost": 0.00642271, "time": 124.40159273147583}, "e517cd2222": {"quality": 0.1737179487179487, "cost": 0.0015429100000000002, "time": 54.5986225605011}, "e51b01f418": {"quality": 0.1952991452991453, "cost": 0.005621012999999999, "time": 119.1760358095169}, "e520dfae5b": {"quality": 0.2098290598290598, "cost": 0.005809304999999999, "time": 154.96505286693574}, "e53906f84b": {"quality": 0.17649572649572648, "cost": 0.003217785, "time": 119.95617558956147}, "e53b349cce": {"quality": 0.04807692307692308, "cost": 0.006779802, "time": 116.54631357192993}, "e5401ed278": {"quality": 0.1987179487179487, "cost": 0.009414028, "time": 154.65711226463316}, "e56a16ca66": {"quality": 0.3237179487179487, "cost": 0.001822221, "time": 82.61048684120178}, "e5a2f72b30": {"quality": 0.2098290598290598, "cost": 0.008565615, "time": 130.87860839366914}, "e5a70a13ac": {"quality": 0.19252136752136753, "cost": 0.007232057000000002, "time": 115.96633384227752}, "e5fdeb4de9": {"quality": 0.1987179487179487, "cost": 0.0035100910000000003, "time": 106.90362923145295}, "e609601eee": {"quality": 0.13333333333333333, "cost": 0.001958406, "time": 89.7593854188919}, "e6f141cc8f": {"quality": 0.1737179487179487, "cost": 0.007898352, "time": 159.6583716392517}, "e7525c117a": {"quality": 0.125, "cost": 0.003349254, "time": 129.90280907154084}, "e7e94ab7a5": {"quality": 0.1, "cost": 0.001623004, "time": 95.54465639591217}, "e86bd256d6": {"quality": 0.12863247863247865, "cost": 0.003572466, "time": 121.87137434482574}, "e89283a4d9": {"quality": 0.17222222222222222, "cost": 0.0017132999999999998, "time": 125.78474328517913}, "e9befb80e0": {"quality": 0.10982905982905983, "cost": 0.0064075799999999995, "time": 125.61113278865814}, "ea38031fc1": {"quality": 0.08482905982905983, "cost": 0.007648266000000001, "time": 158.89819700717925}, "ea6ecc5653": {"quality": 0.0626068376068376, "cost": 0.007534413, "time": 126.29475154876708}, "ea8bcb3ae2": {"quality": 0.16944444444444445, "cost": 0.01714652, "time": 120.57036209106445}, "ea91a2e78b": {"quality": 0.15, "cost": 0.005214324, "time": 129.3872970342636}, "eac300f0d1": {"quality": 0.1737179487179487, "cost": 0.008836863, "time": 174.77539343833922}, "ebaa9b1297": {"quality": 0.2098290598290598, "cost": 0.001620009, "time": 84.55729002952575}, "ebbe8b6c4f": {"quality": 0.0, "cost": 0.006925624, "time": 89.16193358898164}, "ebdf3abff2": {"quality": 0.10982905982905983, "cost": 0.013354845, "time": 123.62594261169434}, "ebee1f2761": {"quality": 0.22863247863247865, "cost": 0.004503177, "time": 143.60831434726714}, "ec8844a5ae": {"quality": 0.025, "cost": 0.00061108, "time": 93.01526029109955}, "ecb5f78f37": {"quality": 0.0, "cost": 0.010665879, "time": 184.15539872646332}, "ece7ff5129": {"quality": 0.09871794871794873, "cost": 0.006345035999999999, "time": 107.36751945018767}, "ed60b8cac5": {"quality": 0.025, "cost": 0.005388696, "time": 182.1943165063858}, "ed6b5480a5": {"quality": 0.0702991452991453, "cost": 0.004838202, "time": 76.91644642353057}, "eda630dc85": {"quality": 0.21752136752136753, "cost": 0.0075326360000000005, "time": 115.44470648765564}, "edaaee5ed4": {"quality": 0.07307692307692308, "cost": 0.006511264000000001, "time": 83.00507278442383}, "edb2b764aa": {"quality": 0.08482905982905983, "cost": 0.013305772, "time": 130.48116376399994}, "edc52339db": {"quality": 0.04444444444444444, "cost": 0.012820474000000002, "time": 177.77201774120329}, "ee46042c5d": {"quality": 0.14444444444444443, "cost": 0.011933044, "time": 173.3385812520981}, "ee855899d8": {"quality": 0.19444444444444442, "cost": 0.002078877, "time": 131.02240426540374}, "eef12d478b": {"quality": 0.06752136752136753, "cost": 0.005545968, "time": 137.4901770591736}, "ef37b3e0be": {"quality": 0.0, "cost": 0.008727486, "time": 182.45494146347045}, "ef43d497f1": {"quality": 0.13974358974358975, "cost": 0.012247042, "time": 182.13725912570953}, "ef4d4c4a62": {"quality": 0.3175213675213675, "cost": 0.008570482, "time": 138.68434212207794}, "f0655621af": {"quality": 0.05641025641025641, "cost": 0.004645079999999999, "time": 51.96998543739319}, "f076b4c9ae": {"quality": 0.1987179487179487, "cost": 0.008021229, "time": 145.75664427280424}, "f07881a734": {"quality": 0.0, "cost": 0.0008367299999999999, "time": 121.20072700977326}, "f0829510fc": {"quality": 0.12585470085470085, "cost": 0.007723235999999999, "time": 174.65302150249482}, "f11eddb4ed": {"quality": 0.22863247863247865, "cost": 0.011729255, "time": 164.10010514259338}, "f12622d3d7": {"quality": 0.0, "cost": 0.008399868000000001, "time": 176.69494199752808}, "f1408da253": {"quality": 0.2098290598290598, "cost": 0.009230984999999999, "time": 164.57058210372924}, "f1770e7d28": {"quality": 0.2098290598290598, "cost": 0.003513861, "time": 159.09384961128234}, "f18cf41929": {"quality": 0.0, "cost": 0.003205404, "time": 109.51144468784332}, "f1bda127f6": {"quality": 0.21752136752136753, "cost": 0.012127632, "time": 119.53270018100739}, "f2a2e91541": {"quality": 0.1814102564102564, "cost": 0.008741034, "time": 164.1866457939148}, "f2c04ed1c8": {"quality": 0.04038461538461539, "cost": 0.004816446, "time": 76.6830270767212}, "f2c

Download .txt

gitextract_v67onyjf/

├── .github/
│   └── workflows/
│       ├── ci.yaml
│       ├── docs.yaml
│       ├── package.yaml
│       └── test-docs.yaml
├── .gitignore
├── LICENSE
├── README.md
├── abacus-research/
│   ├── README.md
│   ├── README_CUAD_LOCAL.md
│   ├── biodex-ablation.py
│   ├── biodex-demo.py
│   ├── biodex-max-quality-at-cost.py
│   ├── biodex-min-at-fixed-quality.py
│   ├── biodex-pareto-cascades.py
│   ├── biodex-priors-cascades.json
│   ├── biodex-priors.json
│   ├── biodex-revision-priors-maxquality.json
│   ├── biodex-revision-priors-mincost.json
│   ├── cheap-priors-cascades.json
│   ├── cheap-priors.json
│   ├── cuad-demo.py
│   ├── cuad-max-quality-at-cost.py
│   ├── cuad-priors.json
│   ├── cuad_data_loader.py
│   ├── download_embeddings_and_mmqa.sh
│   ├── helper-scripts/
│   │   ├── biodex-gen-index.py
│   │   ├── generate-prior-stats-biodex-first-convert.py
│   │   ├── generate-prior-stats-biodex.py
│   │   ├── generate-prior-stats-cuad.py
│   │   ├── mmqa-baseline.py
│   │   ├── mmqa-gen-image-index.py
│   │   ├── mmqa-gen-image-title-index.py
│   │   ├── mmqa-gen-table-index.py
│   │   └── mmqa-gen-text-index.py
│   ├── mmqa-complex-demo.py
│   ├── mmqa-demo.py
│   ├── run_ablation_study.sh
│   ├── run_biodex.sh
│   ├── run_biodex_cascades.sh
│   ├── run_biodex_cost_threshold.sh
│   ├── run_biodex_min_cost_latency.sh
│   ├── run_biodex_priors.sh
│   ├── run_biodex_priors_constrained.sh
│   ├── run_cuad.sh
│   ├── run_cuad_cost_threshold.sh
│   ├── run_cuad_min_cost_latency.sh
│   ├── run_cuad_priors.sh
│   ├── run_cuad_priors_constrained.sh
│   ├── run_mmqa.sh
│   ├── run_mmqa_complex.sh
│   ├── run_mmqa_complex_min_cost_latency.sh
│   ├── run_mmqa_min_cost_latency.sh
│   ├── score_biodex.py
│   ├── score_cuad.py
│   ├── score_mmqa.py
│   ├── score_mmqa_complex.py
│   └── setup_cuad_data.py
├── demos/
│   ├── audio-demo.py
│   ├── caching-demo.py
│   ├── demo_core.py
│   ├── enron-demo.py
│   ├── image-demo.py
│   ├── join-data/
│   │   └── animal-texts/
│   │       ├── animal1.txt
│   │       ├── animal2.txt
│   │       ├── animal3.txt
│   │       ├── animal4.txt
│   │       ├── animal5.txt
│   │       └── animal6.txt
│   ├── join-demo.py
│   ├── paper-demo.py
│   ├── real-estate-demo.py
│   ├── simple-demo.py
│   └── vllm-demo.py
├── evals/
│   └── quest/
│       └── eval.py
├── pyproject.toml
├── quickstart.ipynb
├── ruff.toml
├── scripts/
│   ├── capture_litellm_stats.py
│   ├── capture_provider_stats.py
│   ├── generate_test_messages.py
│   └── update_model_info.py
├── src/
│   └── palimpzest/
│       ├── __init__.py
│       ├── agents/
│       │   ├── __init__.py
│       │   ├── compute_agents.py
│       │   └── search_agents.py
│       ├── constants.py
│       ├── core/
│       │   ├── __init__.py
│       │   ├── data/
│       │   │   ├── __init__.py
│       │   │   ├── context.py
│       │   │   ├── context_manager.py
│       │   │   ├── dataset.py
│       │   │   ├── index_dataset.py
│       │   │   └── iter_dataset.py
│       │   ├── elements/
│       │   │   ├── __init__.py
│       │   │   ├── filters.py
│       │   │   ├── groupbysig.py
│       │   │   └── records.py
│       │   ├── lib/
│       │   │   ├── __init__.py
│       │   │   └── schemas.py
│       │   └── models.py
│       ├── policy.py
│       ├── prompts/
│       │   ├── __init__.py
│       │   ├── agent_prompts.py
│       │   ├── aggregate_prompts.py
│       │   ├── context_search.py
│       │   ├── convert_prompts.py
│       │   ├── critique_and_refine_prompts.py
│       │   ├── filter_prompts.py
│       │   ├── join_prompts.py
│       │   ├── moa_aggregator_prompts.py
│       │   ├── moa_proposer_prompts.py
│       │   ├── prompt_factory.py
│       │   ├── prompt_manager.py
│       │   ├── split_merge_prompts.py
│       │   ├── split_proposer_prompts.py
│       │   ├── utils.py
│       │   └── validator.py
│       ├── query/
│       │   ├── __init__.py
│       │   ├── execution/
│       │   │   ├── __init__.py
│       │   │   ├── all_sample_execution_strategy.py
│       │   │   ├── execution_strategy.py
│       │   │   ├── execution_strategy_type.py
│       │   │   ├── mab_execution_strategy.py
│       │   │   ├── parallel_execution_strategy.py
│       │   │   └── single_threaded_execution_strategy.py
│       │   ├── generators/
│       │   │   ├── __init__.py
│       │   │   ├── gemini_client.py
│       │   │   └── generators.py
│       │   ├── operators/
│       │   │   ├── __init__.py
│       │   │   ├── aggregate.py
│       │   │   ├── compute.py
│       │   │   ├── convert.py
│       │   │   ├── critique_and_refine.py
│       │   │   ├── distinct.py
│       │   │   ├── filter.py
│       │   │   ├── join.py
│       │   │   ├── limit.py
│       │   │   ├── logical.py
│       │   │   ├── mixture_of_agents.py
│       │   │   ├── physical.py
│       │   │   ├── project.py
│       │   │   ├── rag.py
│       │   │   ├── scan.py
│       │   │   ├── search.py
│       │   │   ├── split.py
│       │   │   └── topk.py
│       │   ├── optimizer/
│       │   │   ├── __init__.py
│       │   │   ├── cost_model.py
│       │   │   ├── optimizer.py
│       │   │   ├── optimizer_strategy.py
│       │   │   ├── optimizer_strategy_type.py
│       │   │   ├── plan.py
│       │   │   ├── primitives.py
│       │   │   ├── rules.py
│       │   │   └── tasks.py
│       │   └── processor/
│       │       ├── __init__.py
│       │       ├── config.py
│       │       ├── query_processor.py
│       │       └── query_processor_factory.py
│       ├── schemabuilder/
│       │   ├── __init__.py
│       │   └── schema_builder.py
│       ├── tools/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── allenpdf.py
│       │   ├── pdfparser.py
│       │   └── skema_tools.py
│       ├── utils/
│       │   ├── __init__.py
│       │   ├── env_helpers.py
│       │   ├── hash_helpers.py
│       │   ├── model_helpers.py
│       │   ├── model_info_helpers.py
│       │   ├── progress.py
│       │   ├── pz_models_information.json
│       │   └── udfs.py
│       └── validator/
│           ├── __init__.py
│           └── validator.py
├── testdata/
│   ├── README.md
│   ├── download-testdata.sh
│   ├── enron-eval-medium-labels.json
│   └── target_matching.csv
├── tests/
│   └── pytest/
│       ├── README.md
│       ├── conftest.py
│       ├── data/
│       │   ├── email_schema.json
│       │   ├── email_schema.yml
│       │   ├── synapse_schema.csv
│       │   └── synapse_schema.jsonld
│       ├── fixtures/
│       │   ├── champion_outputs.py
│       │   ├── datasets.py
│       │   ├── execution_data.py
│       │   ├── expected_physical_plans.py
│       │   ├── expected_qualities.py
│       │   ├── expected_records.py
│       │   ├── models.py
│       │   ├── operator_to_stats.py
│       │   ├── physical_plans.py
│       │   ├── schemas.py
│       │   ├── side_effects.py
│       │   └── workloads.py
│       ├── test_aggregate.py
│       ├── test_convert.py
│       ├── test_dataset.py
│       ├── test_distinct.py
│       ├── test_dynamic_models.py
│       ├── test_dynamicschema.py
│       ├── test_execution.py
│       ├── test_filter.py
│       ├── test_generator.py
│       ├── test_iter_dataset.py
│       ├── test_join.py
│       ├── test_map.py
│       ├── test_optimizer.py
│       ├── test_physical.py
│       ├── test_records.py
│       ├── test_rules.py
│       ├── test_scan.py
│       └── test_schemas.py
└── website/
    ├── .gitignore
    ├── README.md
    ├── blog/
    │   ├── 2024-06-01-palimpzest/
    │   │   ├── bibtex.js
    │   │   └── index.md
    │   ├── authors.yml
    │   └── tags.yml
    ├── docs/
    │   ├── api/
    │   │   └── overview.mdx
    │   ├── getting-started/
    │   │   ├── installation.mdx
    │   │   ├── next-steps.mdx
    │   │   └── quickstart.mdx
    │   ├── intro.mdx
    │   └── user-guide/
    │       ├── dataset.mdx
    │       ├── operators/
    │       │   ├── overview.mdx
    │       │   ├── relational.mdx
    │       │   ├── sem_agg.mdx
    │       │   ├── sem_filter.mdx
    │       │   ├── sem_join.mdx
    │       │   ├── sem_map.mdx
    │       │   └── sem_topk.mdx
    │       ├── optimization.mdx
    │       └── overview.mdx
    ├── docusaurus.config.ts
    ├── package.json
    ├── sidebars.ts
    ├── src/
    │   ├── components/
    │   │   ├── HomepageFeatures/
    │   │   │   ├── index.tsx
    │   │   │   └── styles.module.css
    │   │   └── ResearchPage/
    │   │       └── admonitions.tsx
    │   ├── css/
    │   │   └── custom.css
    │   └── pages/
    │       ├── index.module.css
    │       ├── index.tsx
    │       ├── palimpchat.mdx
    │       └── research.mdx
    ├── static/
    │   └── .nojekyll
    └── tsconfig.json

Download .txt

SYMBOL INDEX (1755 symbols across 130 files)

FILE: abacus-research/biodex-ablation.py
  class BiodexValidator (line 32) | class BiodexValidator(pz.Validator):
    method __init__ (line 33) | def __init__(
    method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 147) | def __init__(
    method __len__ (line 169) | def __len__(self):
    method __getitem__ (line 172) | def __getitem__(self, idx: int):
  function search_func (line 291) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 393) | def compute_target_record(entry):
  function rank_precision_at_k (line 405) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 422) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-demo.py
  class BiodexValidator (line 32) | class BiodexValidator(pz.Validator):
    method __init__ (line 33) | def __init__(
    method _compute_pmid_to_label (line 55) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 68) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 93) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 123) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 136) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 146) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 147) | def __init__(
    method __len__ (line 169) | def __len__(self):
    method __getitem__ (line 172) | def __getitem__(self, idx: int):
  function search_func (line 332) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 435) | def compute_target_record(entry):
  function rank_precision_at_k (line 447) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 464) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-max-quality-at-cost.py
  class BiodexValidator (line 34) | class BiodexValidator(pz.Validator):
    method __init__ (line 35) | def __init__(
    method _compute_pmid_to_label (line 57) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 70) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 95) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 125) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 138) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 148) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 149) | def __init__(
    method __len__ (line 171) | def __len__(self):
    method __getitem__ (line 174) | def __getitem__(self, idx: int):
  function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 423) | def compute_target_record(entry):
  function rank_precision_at_k (line 435) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 452) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-min-at-fixed-quality.py
  class BiodexValidator (line 33) | class BiodexValidator(pz.Validator):
    method __init__ (line 34) | def __init__(
    method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 148) | def __init__(
    method __len__ (line 170) | def __len__(self):
    method __getitem__ (line 173) | def __getitem__(self, idx: int):
  function search_func (line 318) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 415) | def compute_target_record(entry):
  function rank_precision_at_k (line 427) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 444) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/biodex-pareto-cascades.py
  class BiodexValidator (line 33) | class BiodexValidator(pz.Validator):
    method __init__ (line 34) | def __init__(
    method _compute_pmid_to_label (line 56) | def _compute_pmid_to_label(self, dataset: list[dict]) -> dict:
    method rank_precision_at_k (line 69) | def rank_precision_at_k(self, preds: list | None, targets: list):
    method term_recall (line 94) | def term_recall(self, preds: list | None, targets: list):
    method map_score_fn (line 124) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 137) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class BiodexDataset (line 147) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 148) | def __init__(
    method __len__ (line 170) | def __len__(self):
    method __getitem__ (line 173) | def __getitem__(self, idx: int):
  function search_func (line 321) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...
  function compute_target_record (line 420) | def compute_target_record(entry):
  function rank_precision_at_k (line 432) | def rank_precision_at_k(preds: list, targets: list, k: int):
  function compute_avg_rp_at_k (line 449) | def compute_avg_rp_at_k(records, k=5):

FILE: abacus-research/cuad-demo.py
  function get_label_df (line 267) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame:
  function get_jaccard (line 325) | def get_jaccard(label, pred):
  function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 401) | def handle_empty_preds(preds):
  class CUADValidator (line 413) | class CUADValidator(pz.Validator):
    method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42):
    method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method _compute_contract_id_to_labels (line 440) | def _compute_contract_id_to_labels(self):
  class CUADDataset (line 493) | class CUADDataset(pz.IterDataset):
    method __init__ (line 494) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 511) | def _construct_dataset(self, dataset, num_contracts, seed: int=42):
    method __len__ (line 544) | def __len__(self):
    method __getitem__ (line 547) | def __getitem__(self, idx: int):
  function compute_precision_recall (line 553) | def compute_precision_recall(label_df, preds_df):
  function parse_arguments (line 589) | def parse_arguments():
  function build_cuad_query (line 665) | def build_cuad_query(dataset, mode):
  function main (line 691) | def main():

FILE: abacus-research/cuad-max-quality-at-cost.py
  function get_label_df (line 268) | def get_label_df(num_contracts: int = 1, seed: int=42) -> pd.DataFrame:
  function get_jaccard (line 325) | def get_jaccard(label, pred):
  function evaluate_entry (line 347) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 401) | def handle_empty_preds(preds):
  class CUADValidator (line 413) | class CUADValidator(pz.Validator):
    method __init__ (line 414) | def __init__(self, num_contracts: int = 1, seed: int=42):
    method map_score_fn (line 425) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method _compute_contract_id_to_labels (line 439) | def _compute_contract_id_to_labels(self):
  class CUADDataset (line 492) | class CUADDataset(pz.IterDataset):
    method __init__ (line 493) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 509) | def _construct_dataset(self, dataset, num_contracts, seed: int=42):
    method __len__ (line 542) | def __len__(self):
    method __getitem__ (line 545) | def __getitem__(self, idx: int):
  function compute_precision_recall (line 551) | def compute_precision_recall(label_df, preds_df):
  function parse_arguments (line 587) | def parse_arguments():
  function build_cuad_query (line 650) | def build_cuad_query(dataset, mode):
  function main (line 677) | def main():

FILE: abacus-research/cuad_data_loader.py
  function load_cuad_data (line 14) | def load_cuad_data(split="test", data_dir=None):
  function get_unique_contracts (line 60) | def get_unique_contracts(dataset):
  function filter_by_contracts (line 69) | def filter_by_contracts(dataset, contract_titles):
  function sample_contracts (line 74) | def sample_contracts(dataset, num_contracts, seed=42):

FILE: abacus-research/helper-scripts/generate-prior-stats-biodex-first-convert.py
  class BiodexDataset (line 26) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 27) | def __init__(
    method compute_label (line 49) | def compute_label(self, entry: dict) -> dict:
    method term_recall (line 59) | def term_recall(preds: list | None, targets: list):
    method __len__ (line 89) | def __len__(self):
    method __getitem__ (line 92) | def __getitem__(self, idx: int):

FILE: abacus-research/helper-scripts/generate-prior-stats-biodex.py
  class BiodexDataset (line 35) | class BiodexDataset(pz.IterDataset):
    method __init__ (line 36) | def __init__(
    method compute_label (line 58) | def compute_label(self, entry: dict) -> dict:
    method rank_precision_at_k (line 71) | def rank_precision_at_k(preds: list | None, targets: list, k: int):
    method term_recall (line 97) | def term_recall(preds: list | None, targets: list):
    method __len__ (line 127) | def __len__(self):
    method __getitem__ (line 130) | def __getitem__(self, idx: int):
  function search_func (line 202) | def search_func(index: chromadb.Collection, query: list[list[float]], k:...

FILE: abacus-research/helper-scripts/generate-prior-stats-cuad.py
  function get_jaccard (line 273) | def get_jaccard(label, pred):
  function evaluate_entry (line 295) | def evaluate_entry(labels, preds, substr_ok):
  function handle_empty_preds (line 350) | def handle_empty_preds(preds):
  function compute_precision_recall (line 365) | def compute_precision_recall(label_df, preds_df):
  class CUADDataset (line 400) | class CUADDataset(pz.IterDataset):
    method __init__ (line 401) | def __init__(self, num_contracts: int = 1, split: str = "train", seed:...
    method _construct_dataset (line 419) | def _construct_dataset(self, dataset, num_contracts, seed: int=42, inc...
    method __len__ (line 484) | def __len__(self):
    method __getitem__ (line 487) | def __getitem__(self, idx: int):
    method get_label_df (line 490) | def get_label_df(self):
  function parse_arguments (line 505) | def parse_arguments():
  function build_cuad_query (line 513) | def build_cuad_query(dataset, mode):
  function main (line 540) | def main():

FILE: abacus-research/helper-scripts/mmqa-baseline.py
  function f1 (line 14) | def f1(preds: list | None, targets: list):

FILE: abacus-research/mmqa-complex-demo.py
  function get_json_from_answer (line 57) | def get_json_from_answer(answer: str):
  class MMQAValidator (line 93) | class MMQAValidator(pz.Validator):
    method __init__ (line 94) | def __init__(self, dataset: list[dict]):
    method _compute_qid_to_labels (line 101) | def _compute_qid_to_labels(self) -> dict:
    method recall (line 121) | def recall(self, preds: list | None, targets: list):
    method f1 (line 151) | def f1(self, preds: list | None, targets: list):
    method map_score_fn (line 189) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method join_score_fn (line 196) | def join_score_fn(self, condition: str, left_input_record: dict, right...
  class MMQAQuestionDataset (line 213) | class MMQAQuestionDataset(pz.IterDataset):
    method __init__ (line 214) | def __init__(self, dataset: list[dict]):
    method __len__ (line 218) | def __len__(self):
    method __getitem__ (line 221) | def __getitem__(self, idx: int):
  class MMQATextDataset (line 225) | class MMQATextDataset(pz.IterDataset):
    method __init__ (line 226) | def __init__(self, dataset: list[dict]):
    method __len__ (line 245) | def __len__(self):
    method __getitem__ (line 248) | def __getitem__(self, idx: int):
  class MMQATableDataset (line 252) | class MMQATableDataset(pz.IterDataset):
    method __init__ (line 253) | def __init__(self, dataset: list[dict]):
    method __len__ (line 296) | def __len__(self):
    method __getitem__ (line 299) | def __getitem__(self, idx: int):
  class MMQAImageDataset (line 303) | class MMQAImageDataset(pz.IterDataset):
    method __init__ (line 304) | def __init__(self, dataset: list[dict]):
    method __len__ (line 341) | def __len__(self):
    method __getitem__ (line 344) | def __getitem__(self, idx: int):
  function get_dataset (line 348) | def get_dataset(split: str, shuffle: bool, seed: int, num_samples: int |...
  function compute_f1 (line 364) | def compute_f1(final_df, answers_df):

FILE: abacus-research/mmqa-demo.py
  function get_json_from_answer (line 46) | def get_json_from_answer(answer: str):
  class MMQAValidator (line 82) | class MMQAValidator(pz.Validator):
    method __init__ (line 83) | def __init__(
    method _compute_qid_to_labels (line 113) | def _compute_qid_to_labels(self) -> dict:
    method recall (line 139) | def recall(self, preds: list | None, targets: list):
    method f1 (line 172) | def f1(self, preds: list | None, targets: list):
    method map_score_fn (line 213) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method topk_score_fn (line 218) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
  class MMQADataset (line 235) | class MMQADataset(pz.IterDataset):
    method __init__ (line 236) | def __init__(
    method __len__ (line 265) | def __len__(self):
    method __getitem__ (line 268) | def __getitem__(self, idx: int):
  function compute_f1 (line 282) | def compute_f1(final_df, answers_df):
  function get_results_and_ids (line 456) | def get_results_and_ids(index: chromadb.Collection, query: list[list[flo...
  function text_search_func (line 482) | def text_search_func(index: chromadb.Collection, query: list[list[float]...
  function table_search_func (line 487) | def table_search_func(index: chromadb.Collection, query: list[list[float...
  function image_search_func (line 492) | def image_search_func(index: chromadb.Collection, query: list[list[float...

FILE: abacus-research/score_biodex.py
  function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_cuad.py
  function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_mmqa.py
  function compute_final_metrics (line 6) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/score_mmqa_complex.py
  function compute_final_metrics (line 7) | def compute_final_metrics(metric: str, dir: str, exp_base_name: str):

FILE: abacus-research/setup_cuad_data.py
  function setup_cuad_data (line 12) | def setup_cuad_data():

FILE: demos/audio-demo.py
  class SmallAudioDataset (line 8) | class SmallAudioDataset(pz.AudioFileDataset):
    method __init__ (line 9) | def __init__(self, *args, **kwargs):

FILE: demos/caching-demo.py
  class TravelRequestDataset (line 169) | class TravelRequestDataset(pz.IterDataset):
    method __init__ (line 172) | def __init__(self, requests: List[str]):
    method __len__ (line 176) | def __len__(self):
    method __getitem__ (line 179) | def __getitem__(self, idx: int):
  function get_model_from_string (line 198) | def get_model_from_string(model_str: str) -> Model:
  function print_cache_stats (line 207) | def print_cache_stats(execution_stats):
  function main (line 247) | def main():

FILE: demos/demo_core.py
  function build_sci_paper_plan (line 30) | def build_sci_paper_plan(dataset):
  function build_test_pdf_plan (line 34) | def build_test_pdf_plan(dataset):
  function build_mit_battery_paper_plan (line 38) | def build_mit_battery_paper_plan(dataset):
  function build_enron_plan (line 45) | def build_enron_plan(dataset):
  function compute_enron_stats (line 49) | def compute_enron_stats(dataset):
  function enron_gby_plan (line 55) | def enron_gby_plan(dataset):
  function enron_count_plan (line 65) | def enron_count_plan(dataset):
  function enron_average_count_plan (line 75) | def enron_average_count_plan(dataset):
  function enron_limit_plan (line 90) | def enron_limit_plan(dataset, limit=5):
  function build_image_plan (line 96) | def build_image_plan(dataset):
  function build_image_agg_plan (line 103) | def build_image_agg_plan(dataset):
  function build_join_plan (line 115) | def build_join_plan(dataset1, dataset2):
  function build_join_image_plan (line 122) | def build_join_image_plan(dataset1, dataset2):
  function get_task_config (line 129) | def get_task_config(task, dataset, join_dataset=None):
  function execute_task (line 188) | def execute_task(task, dataset, policy, join_dataset=None, verbose=False...
  function format_results_table (line 206) | def format_results_table(records: list[DataRecord], cols=None):

FILE: demos/enron-demo.py
  class EnronValidator (line 8) | class EnronValidator(pz.Validator):
    method __init__ (line 9) | def __init__(self, labels_file: str):
    method map_score_fn (line 17) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
  class EnronDataset (line 27) | class EnronDataset(pz.IterDataset):
    method __init__ (line 28) | def __init__(self, dir: str, labels_file: str | None = None, split: st...
    method __len__ (line 37) | def __len__(self):
    method __getitem__ (line 40) | def __getitem__(self, idx: int):

FILE: demos/image-demo.py
  function build_image_plan (line 22) | def build_image_plan(dataset):

FILE: demos/join-demo.py
  function run_text_join (line 16) | def run_text_join():
  function run_image_join (line 30) | def run_image_join():
  function run_text_image_join (line 44) | def run_text_image_join():

FILE: demos/paper-demo.py
  function print_table (line 15) | def print_table(records, cols=None, plan_str=None):
  function within_two_miles_of_mit (line 50) | def within_two_miles_of_mit(record: dict):
  function in_price_range (line 59) | def in_price_range(record: dict):
  class RealEstateListingDataset (line 118) | class RealEstateListingDataset(pz.IterDataset):
    method __init__ (line 119) | def __init__(self, listings_dir):
    method __len__ (line 125) | def __len__(self):
    method __getitem__ (line 128) | def __getitem__(self, idx: int):

FILE: demos/real-estate-demo.py
  function print_table (line 13) | def print_table(records, cols=None, plan_str=None):
  function within_two_miles_of_mit (line 48) | def within_two_miles_of_mit(record: dict):
  function in_price_range (line 57) | def in_price_range(record: dict):
  class RealEstateListingDataset (line 113) | class RealEstateListingDataset(pz.IterDataset):
    method __init__ (line 114) | def __init__(self, listings_dir):
    method __len__ (line 120) | def __len__(self):
    method __getitem__ (line 123) | def __getitem__(self, idx: int):

FILE: demos/simple-demo.py
  function main (line 13) | def main():

FILE: demos/vllm-demo.py
  class SentimentResult (line 21) | class SentimentResult(BaseModel):
  function main (line 25) | def main():

FILE: evals/quest/eval.py
  function prepare_docs_for_query (line 11) | def prepare_docs_for_query(items: list, gt_docs: list) -> list:
  function palimpzest_run_query (line 22) | def palimpzest_run_query(query: dict, documents: list) -> list[str]:
  function main (line 56) | def main():

FILE: scripts/capture_litellm_stats.py
  class RawProviderStatsCapture (line 48) | class RawProviderStatsCapture(CustomLogger):
    method __init__ (line 57) | def __init__(self):
    method log_success_event (line 62) | def log_success_event(self, kwargs, response_obj, start_time, end_time):
    method log_failure_event (line 94) | def log_failure_event(self, kwargs, response_obj, start_time, end_time):
    method reset (line 100) | def reset(self):
    method get_captured_data (line 106) | def get_captured_data(self) -> dict[str, Any]:
  function load_messages (line 179) | def load_messages(modality: str, provider: str, messages_dir: str) -> li...
  function transform_messages_for_litellm (line 186) | def transform_messages_for_litellm(messages: list[dict]) -> list[dict]:
  function call_litellm_api (line 267) | def call_litellm_api(
  function capture_stats_for_provider (line 382) | def capture_stats_for_provider(
  function save_stats (line 425) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo...
  function main (line 436) | def main():

FILE: scripts/capture_provider_stats.py
  function detect_image_media_type (line 35) | def detect_image_media_type(base64_data: str) -> str:
  function load_messages (line 119) | def load_messages(modality: str, provider: str, messages_dir: str) -> li...
  function transform_messages_for_openai (line 126) | def transform_messages_for_openai(messages: list[dict]) -> list[dict]:
  function transform_messages_for_anthropic (line 228) | def transform_messages_for_anthropic(messages: list[dict]) -> tuple[str ...
  function transform_messages_for_gemini (line 311) | def transform_messages_for_gemini(messages: list[dict]) -> tuple[str | N...
  function call_openai_api (line 386) | def call_openai_api(messages: list[dict], model: str, cache_key: str | N...
  function call_azure_api (line 446) | def call_azure_api(messages: list[dict], model: str, cache_key: str | No...
  function call_anthropic_api (line 516) | def call_anthropic_api(messages: list[dict], model: str) -> dict[str, Any]:
  function call_gemini_api (line 565) | def call_gemini_api(messages: list[dict], model: str, use_vertex: bool =...
  function capture_stats_for_provider (line 668) | def capture_stats_for_provider(
  function save_stats (line 726) | def save_stats(stats: dict[str, Any], output_dir: str, provider: str, mo...
  function main (line 737) | def main():

FILE: scripts/generate_test_messages.py
  function generate_session_id (line 36) | def generate_session_id(provider: str, modality: str) -> str:
  class TextInputSchema (line 129) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 135) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 141) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 154) | class OutputSchema(BaseModel):
  function save_messages (line 255) | def save_messages(modality: str, provider: str, messages: list[dict], ou...
  function main (line 283) | def main():

FILE: scripts/update_model_info.py
  function get_free_port (line 115) | def get_free_port() -> int:
  function extract_provider (line 121) | def extract_provider(model_id: str) -> str:
  function get_api_key_env_var (line 156) | def get_api_key_env_var(provider: str) -> str | None:
  function generate_config_yaml (line 160) | def generate_config_yaml(model_ids: list[str]) -> str:
  function fetch_dynamic_model_info (line 188) | def fetch_dynamic_model_info(model_ids: list[str]) -> dict[str, Any]:
  function fetch_litellm_data (line 262) | def fetch_litellm_data() -> dict[str, Any]:
  function load_existing_data (line 275) | def load_existing_data() -> dict[str, Any]:
  function save_data (line 282) | def save_data(data: dict[str, Any]) -> None:
  function derive_model_flags_with_provider (line 295) | def derive_model_flags_with_provider(model_id: str, provider: str) -> di...
  function prompt_for_value (line 307) | def prompt_for_value(field_name: str, current_value: Any, value_type: st...
  function review_field (line 331) | def review_field(
  function convert_and_review_model (line 368) | def convert_and_review_model(
  function update_model (line 539) | def update_model(
  function process_models (line 572) | def process_models(
  function main (line 625) | def main():

FILE: src/palimpzest/agents/search_agents.py
  class PZBaseAgent (line 71) | class PZBaseAgent(CodeAgent):
    method __init__ (line 72) | def __init__(self, run_id: str, context_description: str, *args, **kwa...
    method write_memory_to_messages (line 87) | def write_memory_to_messages(
    method _generate_planning_step (line 101) | def _generate_planning_step(
    method _step_stream (line 243) | def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessa...
    method _run_stream (line 378) | def _run_stream(
    method run (line 445) | def run(
  class PZBaseManagedAgent (line 545) | class PZBaseManagedAgent(PZBaseAgent):
    method __call__ (line 547) | def __call__(self, task: str, **kwargs):
  class DataDiscoveryAgent (line 569) | class DataDiscoveryAgent(PZBaseManagedAgent):
    method __init__ (line 570) | def __init__(self, run_id: str, context_description: str, *args, **kwa...
  class SearchManagerAgent (line 604) | class SearchManagerAgent(PZBaseAgent):
    method __init__ (line 605) | def __init__(self, run_id: str, context_description: str, *args, **kwa...

FILE: src/palimpzest/constants.py
  class PromptStrategy (line 12) | class PromptStrategy(str, Enum):
    method is_agg_prompt (line 46) | def is_agg_prompt(self):
    method is_filter_prompt (line 49) | def is_filter_prompt(self):
    method is_join_prompt (line 52) | def is_join_prompt(self):
    method is_map_prompt (line 55) | def is_map_prompt(self):
    method is_critic_prompt (line 58) | def is_critic_prompt(self):
    method is_refine_prompt (line 61) | def is_refine_prompt(self):
    method is_moa_proposer_prompt (line 64) | def is_moa_proposer_prompt(self):
    method is_moa_aggregator_prompt (line 67) | def is_moa_aggregator_prompt(self):
    method is_split_proposer_prompt (line 70) | def is_split_proposer_prompt(self):
    method is_split_merger_prompt (line 73) | def is_split_merger_prompt(self):
    method is_no_reasoning_prompt (line 76) | def is_no_reasoning_prompt(self):
  class Modality (line 80) | class Modality(str, Enum):
  class AggFunc (line 86) | class AggFunc(str, Enum):
  class Cardinality (line 93) | class Cardinality(str, Enum):
    method _missing_ (line 98) | def _missing_(cls, value):
  class PickOutputStrategy (line 108) | class PickOutputStrategy(str, Enum):
  function log_attempt_number (line 135) | def log_attempt_number(retry_state):
  class Model (line 191) | class Model:
    method __init__ (line 199) | def __init__(self, model_id: str, api_base: str | None = None, **vllm_...
    method _get_litellm_model_specs (line 215) | def _get_litellm_model_specs(self, model_id: str) -> dict:
    method __lt__ (line 257) | def __lt__(self, other):
    method get_all_models (line 265) | def get_all_models(cls) -> list[Model]:
    method value (line 269) | def value(self) -> str:
    method provider (line 273) | def provider(self) -> str | None:
    method api_key_env_var (line 278) | def api_key_env_var(self) -> str | None:
    method __repr__ (line 295) | def __repr__(self) -> str:
    method __str__ (line 298) | def __str__(self) -> str:
    method __eq__ (line 301) | def __eq__(self, other: object) -> bool:
    method __hash__ (line 308) | def __hash__(self) -> int:
    method is_llama_model (line 311) | def is_llama_model(self) -> bool:
    method is_vllm_model (line 314) | def is_vllm_model(self) -> bool:
    method is_embedding_model (line 317) | def is_embedding_model(self) -> bool:
    method is_text_image_multimodal_embedding_model (line 320) | def is_text_image_multimodal_embedding_model(self) -> bool:
    method is_provider_vertex_ai (line 323) | def is_provider_vertex_ai(self) -> bool:
    method is_provider_anthropic (line 326) | def is_provider_anthropic(self) -> bool:
    method is_provider_google_ai_studio (line 329) | def is_provider_google_ai_studio(self) -> bool:
    method is_provider_openai (line 332) | def is_provider_openai(self) -> bool:
    method is_provider_azure (line 335) | def is_provider_azure(self) -> bool:
    method is_provider_together_ai (line 338) | def is_provider_together_ai(self) -> bool:
    method is_provider_deepseek (line 341) | def is_provider_deepseek(self) -> bool:
    method is_provider_ollama (line 344) | def is_provider_ollama(self) -> bool:
    method is_model_gemini (line 347) | def is_model_gemini(self) -> bool:
    method get_model_name (line 350) | def get_model_name(self) -> str:
    method is_o_model (line 353) | def is_o_model(self) -> bool:
    method is_gpt_5_model (line 356) | def is_gpt_5_model(self) -> bool:
    method is_reasoning_model (line 359) | def is_reasoning_model(self) -> bool:
    method is_text_model (line 362) | def is_text_model(self) -> bool:
    method is_vision_model (line 365) | def is_vision_model(self) -> bool:
    method is_audio_model (line 368) | def is_audio_model(self) -> bool:
    method is_text_image_multimodal_model (line 371) | def is_text_image_multimodal_model(self) -> bool:
    method is_text_audio_multimodal_model (line 374) | def is_text_audio_multimodal_model(self) -> bool:
    method supports_prompt_caching (line 377) | def supports_prompt_caching(self) -> bool:
    method get_usd_per_input_token (line 381) | def get_usd_per_input_token(self) -> float:
    method get_usd_per_audio_input_token (line 384) | def get_usd_per_audio_input_token(self) -> float:
    method get_usd_per_image_input_token (line 388) | def get_usd_per_image_input_token(self) -> float:
    method get_usd_per_cache_read_token (line 391) | def get_usd_per_cache_read_token(self) -> float:
    method get_usd_per_audio_cache_read_token (line 394) | def get_usd_per_audio_cache_read_token(self) -> float:
    method get_usd_per_image_cache_read_token (line 397) | def get_usd_per_image_cache_read_token(self) -> float:
    method get_usd_per_cached_token_per_hour (line 401) | def get_usd_per_cached_token_per_hour(self) -> float:
    method get_usd_per_cache_creation_token (line 404) | def get_usd_per_cache_creation_token(self) -> float:
    method get_usd_per_output_token (line 407) | def get_usd_per_output_token(self) -> float:
    method get_usd_per_audio_cache_creation_token (line 411) | def get_usd_per_audio_cache_creation_token(self) -> float:
    method get_usd_per_image_cache_creation_token (line 415) | def get_usd_per_image_cache_creation_token(self) -> float:
    method get_seconds_per_output_token (line 418) | def get_seconds_per_output_token(self) -> float:
    method get_overall_score (line 421) | def get_overall_score(self) -> float:

FILE: src/palimpzest/core/data/context.py
  class Context (line 120) | class Context(Dataset, ABC):
    method __init__ (line 135) | def __init__(
    method description (line 175) | def description(self) -> str:
    method materialized (line 180) | def materialized(self) -> bool:
    method tools (line 185) | def tools(self) -> list[Callable]:
    method __str__ (line 189) | def __str__(self) -> str:
    method set_description (line 192) | def set_description(self, description: str) -> None:
    method set_materialized (line 198) | def set_materialized(self, materialized: str) -> None:
    method compute (line 204) | def compute(self, instruction: str) -> Context:
    method search (line 221) | def search(self, search_query: str) -> Context:
  class TextFileContext (line 236) | class TextFileContext(Context):
    method __init__ (line 237) | def __init__(self, path: str, id: str, description: str) -> None:
    method _check_filter_answer_text (line 271) | def _check_filter_answer_text(self, answer_text: str) -> dict | None:
    method _parse_filter_answer (line 288) | def _parse_filter_answer(self, completion_text: str) -> dict[str, list]:
    method tool_execute_semantic_operators (line 347) | def tool_execute_semantic_operators(self, instruction: str) -> str:

FILE: src/palimpzest/core/data/context_manager.py
  class ContextNotFoundError (line 14) | class ContextNotFoundError(Exception):
  class ContextManager (line 18) | class ContextManager:
    method __init__ (line 24) | def __init__(self):
    method from_pkl (line 45) | def from_pkl(path: str) -> context.Context:
    method to_pkl (line 53) | def to_pkl(context: context.Context, path: str) -> None:
    method num_tokens_from_string (line 58) | def num_tokens_from_string(self, string: str, encoding_name: str) -> int:
    method add_context (line 64) | def add_context(self, context: context.Context, update: bool = False) ...
    method update_context (line 101) | def update_context(self, id: str, description: str, materialized: bool...
    method get_context (line 119) | def get_context(self, id: str) -> context.Context:
    method search_context (line 135) | def search_context(self, query: str, k: int = 1, where: dict | None = ...

FILE: src/palimpzest/core/data/dataset.py
  class Dataset (line 36) | class Dataset:
    method __init__ (line 66) | def __init__(
    method id (line 105) | def id(self) -> str:
    method schema (line 110) | def schema(self) -> type[BaseModel]:
    method is_root (line 115) | def is_root(self) -> bool:
    method __str__ (line 118) | def __str__(self) -> str:
    method __iter__ (line 121) | def __iter__(self) -> Iterator[Dataset]:
    method _compute_dataset_id (line 126) | def _compute_dataset_id(self) -> str:
    method _set_root_datasets (line 136) | def _set_root_datasets(self, new_root_datasets: dict[str, Dataset]) ->...
    method _generate_unique_logical_op_ids (line 154) | def _generate_unique_logical_op_ids(self, topo_idx: int | None = None)...
    method _resolve_depends_on (line 178) | def _resolve_depends_on(self, depends_on: list[str]) -> list[str]:
    method _get_root_datasets (line 184) | def _get_root_datasets(self) -> dict[str, Dataset]:
    method relax_types (line 196) | def relax_types(self) -> None:
    method get_upstream_datasets (line 211) | def get_upstream_datasets(self) -> list[Dataset]:
    method get_limit (line 222) | def get_limit(self) -> int | None:
    method copy (line 238) | def copy(self):
    method join (line 246) | def join(self, other: Dataset, on: str | list[str], how: str = "inner"...
    method sem_join (line 269) | def sem_join(self, other: Dataset, condition: str, desc: str | None = ...
    method filter (line 292) | def filter(
    method sem_filter (line 317) | def sem_filter(
    method _sem_map (line 340) | def _sem_map(self, cols: list[dict] | type[BaseModel] | None,
    method sem_add_columns (line 373) | def sem_add_columns(self, cols: list[dict] | type[BaseModel],
    method sem_map (line 402) | def sem_map(self, cols: list[dict] | type[BaseModel], desc: str | None...
    method sem_flat_map (line 416) | def sem_flat_map(self, cols: list[dict] | type[BaseModel], desc: str |...
    method _map (line 432) | def _map(self, udf: Callable,
    method add_columns (line 464) | def add_columns(self, udf: Callable,
    method map (line 502) | def map(self, udf: Callable,
    method flat_map (line 526) | def flat_map(self, udf: Callable,
    method count (line 550) | def count(self) -> Dataset:
    method average (line 555) | def average(self) -> Dataset:
    method sum (line 560) | def sum(self) -> Dataset:
    method min (line 565) | def min(self) -> Dataset:
    method max (line 570) | def max(self) -> Dataset:
    method groupby (line 575) | def groupby(self, groupby: GroupBySig) -> Dataset:
    method sem_agg (line 580) | def sem_agg(self, col: dict | type[BaseModel], agg: str, depends_on: s...
    method sem_topk (line 611) | def sem_topk(
    method limit (line 650) | def limit(self, n: int) -> Dataset:
    method distinct (line 655) | def distinct(self, distinct_cols: list[str] | None = None) -> Dataset:
    method project (line 660) | def project(self, project_cols: list[str] | str) -> Dataset:
    method run (line 667) | def run(self, config: QueryProcessorConfig | None = None, **kwargs):
    method optimize_and_run (line 682) | def optimize_and_run(self, config: QueryProcessorConfig | None = None,...

FILE: src/palimpzest/core/data/index_dataset.py
  function index_factory (line 8) | def index_factory(index: Collection) -> PZIndex:
  class BaseIndex (line 24) | class BaseIndex(ABC):
    method __init__ (line 26) | def __init__(self, index: Collection):
    method __str__ (line 29) | def __str__(self):
    method search (line 36) | def search(self, query_embedding: list[float] | list[list[float]], res...
  class ChromaIndex (line 53) | class ChromaIndex(BaseIndex):
    method __init__ (line 54) | def __init__(self, index: Collection):

FILE: src/palimpzest/core/data/iter_dataset.py
  class IterDataset (line 33) | class IterDataset(dataset.Dataset, ABC):
    method __init__ (line 42) | def __init__(self, id: str, schema: type[BaseModel] | list[dict]) -> N...
    method __len__ (line 55) | def __len__(self) -> int:
    method __getitem__ (line 60) | def __getitem__(self, idx: int) -> dict:
  class BaseFileDataset (line 79) | class BaseFileDataset(IterDataset):
    method __init__ (line 85) | def __init__(self, path: str, **kwargs) -> None:
    method __len__ (line 110) | def __len__(self) -> int:
  class BaseFileDirectoryDataset (line 114) | class BaseFileDirectoryDataset(IterDataset):
    method __init__ (line 120) | def __init__(self, path: str, **kwargs) -> None:
    method __len__ (line 146) | def __len__(self) -> int:
  class MemoryDataset (line 152) | class MemoryDataset(IterDataset):
    method __init__ (line 162) | def __init__(self, id: str, vals: list | pd.DataFrame, schema: type[Ba...
    method __len__ (line 177) | def __len__(self) -> int:
    method __getitem__ (line 180) | def __getitem__(self, idx: int) -> dict:
  class HTMLFileDataset (line 216) | class HTMLFileDataset(BaseFileDataset):
    method __init__ (line 221) | def __init__(self, id: str, path: str) -> None:
    method _html_to_text_with_links (line 232) | def _html_to_text_with_links(self, html: str) -> str:
    method __getitem__ (line 248) | def __getitem__(self, idx: int) -> dict:
  class ImageFileDataset (line 284) | class ImageFileDataset(BaseFileDataset):
    method __init__ (line 289) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 300) | def __getitem__(self, idx: int) -> dict:
  class PDFFileDataset (line 326) | class PDFFileDataset(BaseFileDataset):
    method __init__ (line 333) | def __init__(
    method __getitem__ (line 354) | def __getitem__(self, idx: int) -> dict:
  class TextFileDataset (line 385) | class TextFileDataset(BaseFileDataset):
    method __init__ (line 390) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 400) | def __getitem__(self, idx: int) -> dict:
  class XLSFileDataset (line 425) | class XLSFileDataset(BaseFileDataset):
    method __init__ (line 430) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 437) | def __getitem__(self, idx: int) -> dict:
  class AudioFileDataset (line 472) | class AudioFileDataset(BaseFileDirectoryDataset):
    method __init__ (line 477) | def __init__(self, id: str, path: str) -> None:
    method __getitem__ (line 488) | def __getitem__(self, idx: int) -> dict:
  function get_local_source (line 514) | def get_local_source(id: str, path: str | Path, **kwargs) -> dataset.Dat...
  function resolve_datasource (line 542) | def resolve_datasource(id: str, source: str | Path | list | pd.DataFrame...

FILE: src/palimpzest/core/elements/filters.py
  class Filter (line 11) | class Filter:
    method __init__ (line 14) | def __init__(self, filter_condition: str | None = None, filter_fn: Cal...
    method serialize (line 18) | def serialize(self) -> dict[str, Any]:
    method get_filter_str (line 24) | def get_filter_str(self) -> str:
    method __repr__ (line 27) | def __repr__(self) -> str:
    method __hash__ (line 30) | def __hash__(self) -> int:
    method __eq__ (line 34) | def __eq__(self, other) -> bool:
    method __str__ (line 42) | def __str__(self) -> str:

FILE: src/palimpzest/core/elements/groupbysig.py
  class GroupBySig (line 21) | class GroupBySig:
    method __init__ (line 22) | def __init__(self, group_by_fields: list[str], agg_funcs: list[str], a...
    method validate_schema (line 27) | def validate_schema(self, input_schema: type[BaseModel]) -> tuple[bool...
    method serialize (line 36) | def serialize(self) -> dict[str, Any]:
    method __str__ (line 44) | def __str__(self) -> str:
    method __hash__ (line 47) | def __hash__(self) -> int:
    method __eq__ (line 51) | def __eq__(self, other) -> bool:
    method get_agg_field_names (line 55) | def get_agg_field_names(self) -> list[str]:
    method output_schema (line 62) | def output_schema(self) -> type[BaseModel]:

FILE: src/palimpzest/core/elements/records.py
  class DataRecord (line 28) | class DataRecord:
    method __init__ (line 31) | def __init__(
    method __setattr__ (line 89) | def __setattr__(self, name: str, value: Any, /) -> None:
    method __getattr__ (line 96) | def __getattr__(self, name: str) -> Any:
    method __getitem__ (line 100) | def __getitem__(self, field: str) -> Any:
    method __setitem__ (line 104) | def __setitem__(self, field: str, value: Any) -> None:
    method __str__ (line 108) | def __str__(self, truncate: int | None = 15) -> str:
    method __repr__ (line 116) | def __repr__(self) -> str:
    method __eq__ (line 120) | def __eq__(self, other):
    method __hash__ (line 124) | def __hash__(self):
    method __iter__ (line 128) | def __iter__(self):
    method get_field_names (line 132) | def get_field_names(self):
    method get_field_type (line 136) | def get_field_type(self, field_name: str) -> FieldInfo:
    method schema (line 140) | def schema(self) -> type[BaseModel]:
    method copy (line 143) | def copy(self) -> DataRecord:
    method from_parent (line 164) | def from_parent(
    method from_agg_parents (line 208) | def from_agg_parents(
    method from_join_parents (line 229) | def from_join_parents(
    method to_df (line 278) | def to_df(records: list[DataRecord], project_cols: list[str] | None = ...
    method to_json_str (line 297) | def to_json_str(self, include_bytes: bool = True, bytes_to_str: bool =...
    method to_dict (line 302) | def to_dict(self, include_bytes: bool = True, bytes_to_str: bool = Fal...
  class DataRecordSet (line 343) | class DataRecordSet:
    method __init__ (line 351) | def __init__(
    method get_total_cost (line 378) | def get_total_cost(self) -> float:
    method get_field_to_score_fn (line 381) | def get_field_to_score_fn(self) -> dict[str, str | callable]:
    method __getitem__ (line 384) | def __getitem__(self, slice) -> DataRecord | list[DataRecord]:
    method __len__ (line 387) | def __len__(self) -> int:
    method __iter__ (line 390) | def __iter__(self) -> Generator[DataRecord]:
  class DataRecordCollection (line 394) | class DataRecordCollection:
    method __init__ (line 410) | def __init__(self, data_records: list[DataRecord], execution_stats: Ex...
    method __iter__ (line 416) | def __iter__(self) -> Generator[DataRecord]:
    method __len__ (line 420) | def __len__(self):
    method to_df (line 424) | def to_df(self, cols: list[str] | None = None):
    method _get_executed_plans (line 427) | def _get_executed_plans(self):

FILE: src/palimpzest/core/lib/schemas.py
  function get_schema_field_names (line 60) | def get_schema_field_names(schema: type[BaseModel], id: str | None = Non...
  function _create_pickleable_model (line 65) | def _create_pickleable_model(fields: dict[str, tuple[type, FieldInfo]]) ...
  function relax_schema (line 90) | def relax_schema(model: type[BaseModel]) -> type[BaseModel]:
  function project (line 99) | def project(model: type[BaseModel], project_fields: list[str]) -> type[B...
  function create_schema_from_fields (line 114) | def create_schema_from_fields(fields: list[dict]) -> type[BaseModel]:
  function create_schema_from_df (line 132) | def create_schema_from_df(df: pd.DataFrame) -> type[BaseModel]:
  function union_schemas (line 145) | def union_schemas(models: list[type[BaseModel]], join: bool = False, on:...
  class DefaultSchema (line 181) | class DefaultSchema(BaseModel):
  class Download (line 185) | class Download(BaseModel):
  class File (line 191) | class File(BaseModel):
  class TextFile (line 200) | class TextFile(BaseModel):
  class Average (line 205) | class Average(BaseModel):
  class Count (line 208) | class Count(BaseModel):
  class Sum (line 211) | class Sum(BaseModel):
  class Min (line 214) | class Min(BaseModel):
  class Max (line 217) | class Max(BaseModel):
  class OperatorDerivedSchema (line 220) | class OperatorDerivedSchema(BaseModel):
  class Table (line 223) | class Table(BaseModel):
  class URL (line 230) | class URL(BaseModel):
  class WebPage (line 234) | class WebPage(BaseModel):
  class ImageFile (line 242) | class ImageFile(File):
  class AudioFile (line 246) | class AudioFile(File):
  class PDFFile (line 250) | class PDFFile(File):
  class XLSFile (line 255) | class XLSFile(File):
  class EquationImage (line 261) | class EquationImage(ImageFile):
  class PlotImage (line 265) | class PlotImage(ImageFile):

FILE: src/palimpzest/core/models.py
  class GenerationStats (line 11) | class GenerationStats(BaseModel):
    method __iadd__ (line 59) | def __iadd__(self, other: GenerationStats) -> GenerationStats:
    method __add__ (line 66) | def __add__(self, other: GenerationStats) -> GenerationStats:
    method __itruediv__ (line 76) | def __itruediv__(self, quotient: float) -> GenerationStats:
    method __truediv__ (line 87) | def __truediv__(self, quotient: float) -> GenerationStats:
    method __radd__ (line 100) | def __radd__(self, other: int) -> GenerationStats:
    method to_json (line 106) | def to_json(self, filepath: str | None = None) -> dict | None:
  class RecordOpStats (line 114) | class RecordOpStats(BaseModel):
  class OperatorStats (line 227) | class OperatorStats(BaseModel):
    method __iadd__ (line 280) | def __iadd__(self, stats: OperatorStats | RecordOpStats) -> OperatorSt...
  class BasePlanStats (line 323) | class BasePlanStats(BaseModel):
    method start (line 385) | def start(self) -> None:
    method finish (line 389) | def finish(self) -> None:
    method from_plan (line 405) | def from_plan(plan) -> BasePlanStats:
    method sum_op_stats_field (line 412) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method sum_validation_stats_field (line 416) | def sum_validation_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 421) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:...
    method __iadd__ (line 428) | def __iadd__(self, plan_stats: BasePlanStats) -> None:
    method __str__ (line 435) | def __str__(self) -> str:
    method get_total_cost_so_far (line 441) | def get_total_cost_so_far(self) -> float:
  class PlanStats (line 448) | class PlanStats(BasePlanStats):
    method from_plan (line 453) | def from_plan(plan) -> PlanStats:
    method sum_op_stats_field (line 471) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 475) | def add_record_op_stats(self, unique_full_op_id: str, record_op_stats:...
    method __iadd__ (line 489) | def __iadd__(self, plan_stats: PlanStats) -> None:
    method __str__ (line 512) | def __str__(self) -> str:
  class SentinelPlanStats (line 527) | class SentinelPlanStats(BasePlanStats):
    method from_plan (line 532) | def from_plan(plan) -> SentinelPlanStats:
    method sum_op_stats_field (line 552) | def sum_op_stats_field(self, field_name: str) -> float | int:
    method add_record_op_stats (line 556) | def add_record_op_stats(self, unique_logical_op_id: str, record_op_sta...
    method add_validation_gen_stats (line 574) | def add_validation_gen_stats(self, unique_logical_op_id: str, gen_stat...
    method __iadd__ (line 583) | def __iadd__(self, plan_stats: SentinelPlanStats) -> None:
    method __str__ (line 616) | def __str__(self) -> str:
  class ExecutionStats (line 635) | class ExecutionStats(BaseModel):
    method start (line 700) | def start(self) -> None:
    method finish_optimization (line 704) | def finish_optimization(self) -> None:
    method finish (line 717) | def finish(self) -> None:
    method sum_plan_stats_field (line 747) | def sum_plan_stats_field(self, field_name: str) -> float | int:
    method sum_sentinel_plan_costs (line 755) | def sum_sentinel_plan_costs(self) -> float:
    method sum_plan_costs (line 764) | def sum_plan_costs(self) -> float:
    method add_plan_stats (line 770) | def add_plan_stats(self, plan_stats: PlanStats | SentinelPlanStats | l...
    method to_json (line 794) | def to_json(self, filepath: str | None = None) -> dict | None:
  class OperatorCostEstimates (line 802) | class OperatorCostEstimates(BaseModel):
    method __rmul__ (line 843) | def __rmul__(self, multiplier: float) -> OperatorCostEstimates:
    method model_post_init (line 850) | def model_post_init(self, __context: Any) -> None:
  class PlanCost (line 868) | class PlanCost(BaseModel):
    method __hash__ (line 903) | def __hash__(self):
    method __eq__ (line 906) | def __eq__(self, other: Any) -> bool:
    method model_post_init (line 915) | def model_post_init(self, __context: Any) -> None:
    method join_add (line 928) | def join_add(self, left_plan_cost: PlanCost, right_plan_cost: PlanCost...
    method __iadd__ (line 969) | def __iadd__(self, other: PlanCost) -> PlanCost:
    method __add__ (line 989) | def __add__(self, other: PlanCost) -> PlanCost:

FILE: src/palimpzest/policy.py
  function construct_policy_from_kwargs (line 8) | def construct_policy_from_kwargs(**kwargs) -> Policy | None:
  class Policy (line 64) | class Policy:
    method __init__ (line 73) | def __init__(self):
    method get_primary_metric (line 76) | def get_primary_metric(self) -> str:
    method get_dict (line 87) | def get_dict(self) -> dict:
    method constraint (line 94) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 101) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
    method to_json_str (line 107) | def to_json_str(self) -> str:
  class MaxQuality (line 115) | class MaxQuality(Policy):
    method __str__ (line 121) | def __str__(self):
    method get_primary_metric (line 124) | def get_primary_metric(self) -> str:
    method get_dict (line 127) | def get_dict(self) -> dict:
    method constraint (line 130) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 134) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinCost (line 147) | class MinCost(Policy):
    method __str__ (line 153) | def __str__(self):
    method get_primary_metric (line 156) | def get_primary_metric(self) -> str:
    method get_dict (line 159) | def get_dict(self) -> dict:
    method constraint (line 162) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 166) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinTime (line 179) | class MinTime(Policy):
    method __str__ (line 185) | def __str__(self):
    method get_primary_metric (line 188) | def get_primary_metric(self) -> str:
    method get_dict (line 191) | def get_dict(self) -> dict:
    method constraint (line 194) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 198) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MaxQualityAtFixedCost (line 211) | class MaxQualityAtFixedCost(Policy):
    method __init__ (line 217) | def __init__(self, max_cost: float):
    method __str__ (line 220) | def __str__(self):
    method get_primary_metric (line 223) | def get_primary_metric(self) -> str:
    method get_dict (line 226) | def get_dict(self) -> dict:
    method constraint (line 229) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 232) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MaxQualityAtFixedTime (line 245) | class MaxQualityAtFixedTime(Policy):
    method __init__ (line 251) | def __init__(self, max_time: float):
    method __str__ (line 254) | def __str__(self):
    method get_primary_metric (line 257) | def get_primary_metric(self) -> str:
    method get_dict (line 260) | def get_dict(self) -> dict:
    method constraint (line 263) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 266) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinCostAtFixedQuality (line 279) | class MinCostAtFixedQuality(Policy):
    method __init__ (line 285) | def __init__(self, min_quality: float):
    method __str__ (line 288) | def __str__(self):
    method get_primary_metric (line 291) | def get_primary_metric(self) -> str:
    method get_dict (line 294) | def get_dict(self) -> dict:
    method constraint (line 297) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 300) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:
  class MinTimeAtFixedQuality (line 313) | class MinTimeAtFixedQuality(Policy):
    method __init__ (line 319) | def __init__(self, min_quality: float):
    method __str__ (line 322) | def __str__(self):
    method get_primary_metric (line 325) | def get_primary_metric(self) -> str:
    method get_dict (line 328) | def get_dict(self) -> dict:
    method constraint (line 331) | def constraint(self, plan: PlanCost) -> bool:
    method choose (line 334) | def choose(self, plan: PlanCost, other_plan: PlanCost) -> float:

FILE: src/palimpzest/prompts/prompt_factory.py
  function _detect_image_media_type (line 144) | def _detect_image_media_type(filepath: str | None = None, base64_data: s...
  class PromptFactory (line 170) | class PromptFactory:
    method __init__ (line 232) | def __init__(self, prompt_strategy: PromptStrategy, model: Model, card...
    method _get_context (line 238) | def _get_context(self, candidate: DataRecord | list[DataRecord], input...
    method _get_input_fields (line 289) | def _get_input_fields(self, candidate: DataRecord, **kwargs) -> list[s...
    method _get_input_modalities (line 308) | def _get_input_modalities(self, candidate: DataRecord, input_fields: l...
    method _get_modalities_str (line 331) | def _get_modalities_str(self, input_modalities: set[Modality]) -> str:
    method _get_input_fields_desc (line 356) | def _get_input_fields_desc(self, candidate: DataRecord, input_fields: ...
    method _get_output_fields_desc (line 372) | def _get_output_fields_desc(self, output_fields: list[str], **kwargs) ...
    method _get_agg_instruction (line 395) | def _get_agg_instruction(self, **kwargs) -> str | None:
    method _get_filter_condition (line 408) | def _get_filter_condition(self, **kwargs) -> str | None:
    method _get_join_condition (line 421) | def _get_join_condition(self, **kwargs) -> str | None:
    method _get_original_output (line 434) | def _get_original_output(self, **kwargs) -> str | None:
    method _get_critique_output (line 452) | def _get_critique_output(self, **kwargs) -> str | None:
    method _get_model_responses (line 468) | def _get_model_responses(self, **kwargs) -> str | None:
    method _get_chunk_outputs (line 487) | def _get_chunk_outputs(self, **kwargs) -> str | None:
    method _get_output_format_instruction (line 506) | def _get_output_format_instruction(self) -> str:
    method _get_job_instruction (line 519) | def _get_job_instruction(self, input_modalities: set[Modality]) -> str...
    method _get_desc_section (line 549) | def _get_desc_section(self) -> str:
    method _get_critique_criteria (line 562) | def _get_critique_criteria(self) -> str | None:
    method _get_refinement_criteria (line 575) | def _get_refinement_criteria(self) -> str | None:
    method _get_finish_instruction (line 588) | def _get_finish_instruction(self) -> str | None:
    method _get_example_input_fields (line 603) | def _get_example_input_fields(self, input_modalities: set[Modality], r...
    method _get_example_output_fields (line 627) | def _get_example_output_fields(self, input_modalities: set[Modality]) ...
    method _get_example_context (line 650) | def _get_example_context(self, input_modalities: set[Modality], right:...
    method _get_image_disclaimer (line 684) | def _get_image_disclaimer(self, input_modalities: set[Modality], right...
    method _get_audio_disclaimer (line 697) | def _get_audio_disclaimer(self, input_modalities: set[Modality], right...
    method _get_example_reasoning (line 710) | def _get_example_reasoning(self, input_modalities: set[Modality]) -> str:
    method _get_example_answer (line 737) | def _get_example_answer(self, input_modalities: set[Modality]) -> str:
    method _get_all_format_kwargs (line 763) | def _get_all_format_kwargs(
    method _create_audio_messages (line 837) | def _create_audio_messages(self, candidate: DataRecord | list[DataReco...
    method _create_image_messages (line 893) | def _create_image_messages(self, candidate: DataRecord | list[DataReco...
    method _get_system_prompt (line 963) | def _get_system_prompt(self, **format_kwargs) -> str | None:
    method _get_user_messages (line 980) | def _get_user_messages(self, candidate: DataRecord | list[DataRecord],...
    method create_messages (line 1074) | def create_messages(self, candidate: DataRecord | list[DataRecord], ou...

FILE: src/palimpzest/prompts/prompt_manager.py
  class PromptManager (line 17) | class PromptManager:
    method __init__ (line 30) | def __init__(self, model: Model):
    method get_cache_kwargs (line 35) | def get_cache_kwargs(self) -> dict[str, Any]:
    method inject_cache_isolation_id (line 51) | def inject_cache_isolation_id(self, messages: list[dict], session_id: ...
    method update_messages_for_caching (line 65) | def update_messages_for_caching(self, messages: list[dict]) -> list[di...
    method extract_usage_stats (line 92) | def extract_usage_stats(self, usage: dict, is_audio_op: bool) -> dict[...
    method _remove_cache_boundary_markers (line 149) | def _remove_cache_boundary_markers(self, messages: list[dict]) -> list...
    method _transform_messages_for_anthropic (line 173) | def _transform_messages_for_anthropic(self, messages: list[dict]) -> l...

FILE: src/palimpzest/query/execution/all_sample_execution_strategy.py
  class OpSet (line 20) | class OpSet:
    method __init__ (line 29) | def __init__(self, op_set: list[PhysicalOperator], source_unique_logic...
    method get_op_inputs (line 48) | def get_op_inputs(self) -> list[PhysicalOperator, DataRecord | int | N...
    method pick_highest_quality_output (line 103) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]...
    method update_inputs (line 141) | def update_inputs(self, source_idx_to_record_sets: dict[int, DataRecor...
  class AllSamplingExecutionStrategy (line 153) | class AllSamplingExecutionStrategy(SentinelExecutionStrategy):
    method _execute_sentinel_plan (line 155) | def _execute_sentinel_plan(self,
    method execute_sentinel_plan (line 208) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic...

FILE: src/palimpzest/query/execution/execution_strategy.py
  class BaseExecutionStrategy (line 25) | class BaseExecutionStrategy:
    method __init__ (line 26) | def __init__(self,
  class ExecutionStrategy (line 43) | class ExecutionStrategy(BaseExecutionStrategy, ABC):
    method __init__ (line 46) | def __init__(self, *args, **kwargs):
    method execute_plan (line 52) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ...
    method _create_input_queues (line 56) | def _create_input_queues(self, plan: PhysicalPlan) -> dict[str, dict[s...
  class SentinelExecutionStrategy (line 77) | class SentinelExecutionStrategy(BaseExecutionStrategy, ABC):
    method __init__ (line 83) | def __init__(
    method _score_quality (line 117) | def _score_quality(
    method _execute_op_set (line 275) | def _execute_op_set(self, unique_logical_op_id: str, op_inputs: list[t...
    method _is_llm_op (line 345) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool:
    method execute_sentinel_plan (line 353) | def execute_sentinel_plan(self, sentinel_plan: SentinelPlan, train_dat...

FILE: src/palimpzest/query/execution/execution_strategy_type.py
  class ExecutionStrategyType (line 12) | class ExecutionStrategyType(Enum):
    method is_fully_parallel (line 18) | def is_fully_parallel(self) -> bool:
  class SentinelExecutionStrategyType (line 22) | class SentinelExecutionStrategyType(Enum):

FILE: src/palimpzest/query/execution/mab_execution_strategy.py
  class OpFrontier (line 27) | class OpFrontier:
    method __init__ (line 36) | def __init__(
    method get_frontier_ops (line 96) | def get_frontier_ops(self) -> list[PhysicalOperator]:
    method get_off_frontier_ops (line 102) | def get_off_frontier_ops(self) -> list[PhysicalOperator]:
    method _compute_op_id_to_pareto_distance (line 108) | def _compute_op_id_to_pareto_distance(self, priors: dict[str, dict[str...
    method _compute_naive_priors (line 168) | def _compute_naive_priors(self, op_set: list[PhysicalOperator]) -> dic...
    method _get_op_index_order (line 191) | def _get_op_index_order(self, op_set: list[PhysicalOperator], seed: in...
    method _get_op_source_indices_pairs (line 258) | def _get_op_source_indices_pairs(self) -> list[tuple[PhysicalOperator,...
    method get_source_indices_for_next_iteration (line 282) | def get_source_indices_for_next_iteration(self) -> set[tuple[str]]:
    method get_frontier_op_inputs (line 289) | def get_frontier_op_inputs(self, source_indices_to_sample: set[tuple[s...
    method update_frontier (line 369) | def update_frontier(self, unique_logical_op_id: str, plan_stats: Senti...
    method pick_highest_quality_output (line 581) | def pick_highest_quality_output(self, record_sets: list[DataRecordSet]...
    method update_inputs (line 619) | def update_inputs(self, source_unique_logical_op_id: str, source_indic...
  class MABExecutionStrategy (line 631) | class MABExecutionStrategy(SentinelExecutionStrategy):
    method _remove_filtered_records_from_downstream_ops (line 639) | def _remove_filtered_records_from_downstream_ops(self, topo_idx: int, ...
    method _get_max_quality_op (line 661) | def _get_max_quality_op(self, unique_logical_op_id: str, op_frontiers:...
    method _compute_termination_condition (line 692) | def _compute_termination_condition(self, samples_drawn: int, sampling_...
    method _execute_sentinel_plan (line 695) | def _execute_sentinel_plan(
    method execute_sentinel_plan (line 791) | def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dic...

FILE: src/palimpzest/query/execution/parallel_execution_strategy.py
  class ParallelExecutionStrategy (line 19) | class ParallelExecutionStrategy(ExecutionStrategy):
    method __init__ (line 24) | def __init__(self, *args, **kwargs):
    method _any_queue_not_empty (line 27) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic...
    method _upstream_ops_finished (line 37) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id...
    method _finish_outer_join (line 44) | def _finish_outer_join(self, executor: ThreadPoolExecutor, plan: Physi...
    method _process_future_results (line 58) | def _process_future_results(self, unique_full_op_id: str, future_queue...
    method _execute_plan (line 101) | def _execute_plan(
    method execute_plan (line 235) | def execute_plan(self, plan: PhysicalPlan):

FILE: src/palimpzest/query/execution/single_threaded_execution_strategy.py
  class SequentialSingleThreadExecutionStrategy (line 15) | class SequentialSingleThreadExecutionStrategy(ExecutionStrategy):
    method __init__ (line 25) | def __init__(self, *args, **kwargs):
    method _execute_plan (line 29) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di...
    method execute_plan (line 116) | def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], ...
  class PipelinedSingleThreadExecutionStrategy (line 149) | class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
    method __init__ (line 163) | def __init__(self, *args, **kwargs):
    method _any_queue_not_empty (line 167) | def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dic...
    method _upstream_ops_finished (line 177) | def _upstream_ops_finished(self, plan: PhysicalPlan, unique_full_op_id...
    method _execute_plan (line 184) | def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, di...
    method execute_plan (line 284) | def execute_plan(self, plan: PhysicalPlan):

FILE: src/palimpzest/query/generators/gemini_client.py
  class GeminiResponse (line 24) | class GeminiResponse:
  class GeminiClient (line 31) | class GeminiClient:
    method get_instance (line 57) | def get_instance(cls, model: str, use_vertex: bool = False) -> GeminiC...
    method __init__ (line 64) | def __init__(self, model: str, use_vertex: bool = False):
    method _detect_image_media_type (line 70) | def _detect_image_media_type(self, base64_data: str) -> str:
    method _transform_messages (line 86) | def _transform_messages(self, messages: list[dict]) -> tuple[str | Non...
    method _extract_usage_stats (line 182) | def _extract_usage_stats(self, usage_metadata: Any) -> dict:
    method generate (line 243) | def generate(

FILE: src/palimpzest/query/generators/generators.py
  function get_json_from_answer (line 34) | def get_json_from_answer(answer: str, model: Model, cardinality: Cardina...
  class Generator (line 98) | class Generator(Generic[ContextType, InputType]):
    method __init__ (line 103) | def __init__(
    method _parse_reasoning (line 131) | def _parse_reasoning(self, completion_text: str, **kwargs) -> str:
    method _prepare_field_answers (line 149) | def _prepare_field_answers(self, field_answers: dict | list[dict], fie...
    method _check_convert_answer_text (line 171) | def _check_convert_answer_text(self, answer_text: str, fields: dict[st...
    method _check_bool_answer_text (line 188) | def _check_bool_answer_text(self, answer_text: str, throw_exception: b...
    method _parse_convert_answer (line 206) | def _parse_convert_answer(self, completion_text: str, fields: dict[str...
    method _parse_bool_answer (line 245) | def _parse_bool_answer(self, completion_text: str, json_output: bool) ...
    method _parse_answer (line 284) | def _parse_answer(self, completion_text: str, fields: dict[str, FieldI...
    method __call__ (line 303) | def __call__(self, candidate: DataRecord | list[DataRecord], fields: d...

FILE: src/palimpzest/query/operators/aggregate.py
  class AggregateOp (line 23) | class AggregateOp(PhysicalOperator):
    method __call__ (line 29) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class ApplyGroupByOp (line 33) | class ApplyGroupByOp(AggregateOp):
    method __init__ (line 39) | def __init__(self, group_by_sig: GroupBySig, *args, **kwargs):
    method __str__ (line 43) | def __str__(self):
    method get_id_params (line 48) | def get_id_params(self):
    method get_op_params (line 52) | def get_op_params(self):
    method naive_cost_estimates (line 56) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method agg_init (line 66) | def agg_init(func):
    method agg_merge (line 85) | def agg_merge(func, state, val):
    method agg_final (line 115) | def agg_final(func, state):
    method __call__ (line 124) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class AverageAggregateOp (line 192) | class AverageAggregateOp(AggregateOp):
    method __init__ (line 195) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 215) | def __str__(self):
    method get_id_params (line 220) | def get_id_params(self):
    method get_op_params (line 224) | def get_op_params(self):
    method naive_cost_estimates (line 228) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 237) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class SumAggregateOp (line 271) | class SumAggregateOp(AggregateOp):
    method __init__ (line 274) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 294) | def __str__(self):
    method get_id_params (line 299) | def get_id_params(self):
    method get_op_params (line 303) | def get_op_params(self):
    method naive_cost_estimates (line 307) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 316) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class CountAggregateOp (line 347) | class CountAggregateOp(AggregateOp):
    method __init__ (line 350) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 358) | def __str__(self):
    method get_id_params (line 363) | def get_id_params(self):
    method get_op_params (line 367) | def get_op_params(self):
    method naive_cost_estimates (line 371) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 380) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class MinAggregateOp (line 404) | class MinAggregateOp(AggregateOp):
    method __init__ (line 407) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 415) | def __str__(self):
    method get_id_params (line 420) | def get_id_params(self):
    method get_op_params (line 424) | def get_op_params(self):
    method naive_cost_estimates (line 428) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 437) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class MaxAggregateOp (line 467) | class MaxAggregateOp(AggregateOp):
    method __init__ (line 470) | def __init__(self, agg_func: AggFunc, *args, **kwargs):
    method __str__ (line 478) | def __str__(self):
    method get_id_params (line 483) | def get_id_params(self):
    method get_op_params (line 487) | def get_op_params(self):
    method naive_cost_estimates (line 491) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 500) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:
  class SemanticAggregate (line 531) | class SemanticAggregate(AggregateOp):
    method __init__ (line 533) | def __init__(self, agg_str: str, model: Model, prompt_strategy: Prompt...
    method __str__ (line 543) | def __str__(self):
    method get_id_params (line 550) | def get_id_params(self):
    method get_op_params (line 562) | def get_op_params(self):
    method get_model_name (line 574) | def get_model_name(self) -> str:
    method naive_cost_estimates (line 577) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 611) | def __call__(self, candidates: list[DataRecord]) -> DataRecordSet:

FILE: src/palimpzest/query/operators/compute.py
  function make_tool (line 17) | def make_tool(bound_method):
  class SmolAgentsCompute (line 38) | class SmolAgentsCompute(PhysicalOperator):
    method __init__ (line 41) | def __init__(self, context_id: str, instruction: str, additional_conte...
    method __str__ (line 52) | def __str__(self):
    method get_id_params (line 59) | def get_id_params(self):
    method get_op_params (line 68) | def get_op_params(self):
    method naive_cost_estimates (line 77) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method _create_record_set (line 85) | def _create_record_set(
    method __call__ (line 129) | def __call__(self, candidate: DataRecord) -> Any:

FILE: src/palimpzest/query/operators/convert.py
  class ConvertOp (line 23) | class ConvertOp(PhysicalOperator, ABC):
    method __init__ (line 24) | def __init__(
    method get_id_params (line 37) | def get_id_params(self):
    method get_op_params (line 48) | def get_op_params(self):
    method _create_data_records_from_field_answers (line 59) | def _create_data_records_from_field_answers(
    method _create_record_set (line 90) | def _create_record_set(
    method convert (line 142) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
    method __call__ (line 163) | def __call__(self, candidate: DataRecord) -> DataRecordSet:
  class NonLLMConvert (line 198) | class NonLLMConvert(ConvertOp):
    method __str__ (line 199) | def __str__(self):
    method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 224) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class LLMConvert (line 262) | class LLMConvert(ConvertOp):
    method __init__ (line 267) | def __init__(
    method __str__ (line 282) | def __str__(self):
    method get_id_params (line 288) | def get_id_params(self):
    method get_op_params (line 299) | def get_op_params(self):
    method get_model_name (line 310) | def get_model_name(self):
    method naive_cost_estimates (line 313) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
  class LLMConvertBonded (line 352) | class LLMConvertBonded(LLMConvert):
    method convert (line 354) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...

FILE: src/palimpzest/query/operators/critique_and_refine.py
  class CritiqueAndRefineConvert (line 18) | class CritiqueAndRefineConvert(LLMConvert):
    method __init__ (line 20) | def __init__(
    method __str__ (line 35) | def __str__(self):
    method get_id_params (line 41) | def get_id_params(self):
    method get_op_params (line 51) | def get_op_params(self):
    method naive_cost_estimates (line 61) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 81) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class CritiqueAndRefineFilter (line 106) | class CritiqueAndRefineFilter(LLMFilter):
    method __init__ (line 108) | def __init__(
    method __str__ (line 123) | def __str__(self):
    method get_id_params (line 129) | def get_id_params(self):
    method get_op_params (line 139) | def get_op_params(self):
    method naive_cost_estimates (line 149) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 169) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/distinct.py
  class DistinctOp (line 8) | class DistinctOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, distinct_cols: list[str], distinct_seen: set | None...
    method __str__ (line 14) | def __str__(self):
    method get_id_params (line 19) | def get_id_params(self):
    method get_op_params (line 23) | def get_op_params(self):
    method naive_cost_estimates (line 27) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 36) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/filter.py
  class FilterOp (line 23) | class FilterOp(PhysicalOperator, ABC):
    method __init__ (line 24) | def __init__(self, filter: Filter, desc: str | None = None, *args, **k...
    method __str__ (line 30) | def __str__(self):
    method get_id_params (line 35) | def get_id_params(self):
    method get_op_params (line 39) | def get_op_params(self):
    method filter (line 44) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...
    method _create_record_set (line 60) | def _create_record_set(
    method __call__ (line 107) | def __call__(self, candidate: DataRecord) -> DataRecordSet:
  class NonLLMFilter (line 125) | class NonLLMFilter(FilterOp):
    method naive_cost_estimates (line 127) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 143) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...
  class LLMFilter (line 165) | class LLMFilter(FilterOp):
    method __init__ (line 166) | def __init__(
    method get_id_params (line 181) | def get_id_params(self):
    method get_op_params (line 192) | def get_op_params(self):
    method get_model_name (line 203) | def get_model_name(self):
    method naive_cost_estimates (line 206) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 247) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/join.py
  class Singleton (line 29) | class Singleton:
    method __new__ (line 30) | def __new__(cls, *args, **kw):
  class Locks (line 36) | class Locks(Singleton):
    method get_model (line 42) | def get_model(cls, model_name: str):
  function compute_similarity (line 48) | def compute_similarity(left_embedding: list[float], right_embedding: lis...
  class JoinOp (line 55) | class JoinOp(PhysicalOperator, ABC):
    method __init__ (line 56) | def __init__(
    method __str__ (line 86) | def __str__(self):
    method get_id_params (line 93) | def get_id_params(self):
    method get_op_params (line 105) | def get_op_params(self):
    method _compute_unmatched_records (line 118) | def _compute_unmatched_records(self) -> DataRecordSet:
    method naive_cost_estimates (line 172) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method set_finished (line 175) | def set_finished(self):
  class RelationalJoin (line 179) | class RelationalJoin(JoinOp):
    method get_model_name (line 181) | def get_model_name(self):
    method _process_join_candidate_pair (line 184) | def _process_join_candidate_pair(self, left_candidate, right_candidate...
    method naive_cost_estimates (line 228) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method __call__ (line 243) | def __call__(self, left_candidates: list[DataRecord], right_candidates...
  class LLMJoin (line 290) | class LLMJoin(JoinOp):
    method __init__ (line 291) | def __init__(
    method __str__ (line 305) | def __str__(self):
    method get_id_params (line 312) | def get_id_params(self):
    method get_op_params (line 322) | def get_op_params(self):
    method get_model_name (line 332) | def get_model_name(self):
    method _process_join_candidate_pair (line 335) | def _process_join_candidate_pair(
  class NestedLoopsJoin (line 395) | class NestedLoopsJoin(LLMJoin):
    method naive_cost_estimates (line 397) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method __call__ (line 439) | def __call__(self, left_candidates: list[DataRecord], right_candidates...
  class EmbeddingJoin (line 492) | class EmbeddingJoin(LLMJoin):
    method __init__ (line 495) | def __init__(
    method __str__ (line 529) | def __str__(self):
    method get_id_params (line 535) | def get_id_params(self):
    method get_op_params (line 545) | def get_op_params(self):
    method naive_cost_estimates (line 555) | def naive_cost_estimates(self, left_source_op_cost_estimates: Operator...
    method _compute_embeddings (line 588) | def _compute_embeddings(self, candidates: list[DataRecord], input_fiel...
    method _process_join_candidate_pair (line 636) | def _process_join_candidate_pair(self, left_candidate, right_candidate...
    method _process_join_candidate_with_sim (line 640) | def _process_join_candidate_with_sim(self, left_candidate: DataRecord,...
    method __call__ (line 675) | def __call__(self, left_candidates: list[DataRecord], right_candidates...

FILE: src/palimpzest/query/operators/limit.py
  class LimitScanOp (line 8) | class LimitScanOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, limit: int, *args, **kwargs):
    method __str__ (line 13) | def __str__(self):
    method get_id_params (line 18) | def get_id_params(self):
    method get_op_params (line 22) | def get_op_params(self):
    method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/logical.py
  class LogicalOperator (line 16) | class LogicalOperator:
    method __init__ (line 38) | def __init__(
    method __str__ (line 57) | def __str__(self) -> str:
    method __eq__ (line 60) | def __eq__(self, other) -> bool:
    method copy (line 64) | def copy(self) -> LogicalOperator:
    method logical_op_name (line 70) | def logical_op_name(self) -> str:
    method get_unique_logical_op_id (line 74) | def get_unique_logical_op_id(self) -> str:
    method set_unique_logical_op_id (line 80) | def set_unique_logical_op_id(self, unique_logical_op_id: str) -> None:
    method get_logical_id_params (line 87) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 99) | def get_logical_op_params(self) -> dict:
    method get_logical_op_id (line 112) | def get_logical_op_id(self):
    method get_generated_fields (line 134) | def get_generated_fields(self) -> list[str]:
    method __hash__ (line 138) | def __hash__(self):
  class Aggregate (line 144) | class Aggregate(LogicalOperator):
    method __init__ (line 150) | def __init__(
    method __str__ (line 176) | def __str__(self):
    method get_logical_id_params (line 180) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 190) | def get_logical_op_params(self) -> dict:
  class BaseScan (line 201) | class BaseScan(LogicalOperator):
    method __init__ (line 204) | def __init__(self, datasource: dataset.Dataset, output_schema: type[Ba...
    method __str__ (line 208) | def __str__(self):
    method __eq__ (line 211) | def __eq__(self, other) -> bool:
    method get_logical_id_params (line 219) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 228) | def get_logical_op_params(self) -> dict:
  class ContextScan (line 235) | class ContextScan(LogicalOperator):
    method __init__ (line 238) | def __init__(self, context: context.Context, output_schema: type[BaseM...
    method __str__ (line 242) | def __str__(self):
    method __eq__ (line 245) | def __eq__(self, other) -> bool:
    method get_logical_id_params (line 251) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 260) | def get_logical_op_params(self) -> dict:
  class ConvertScan (line 267) | class ConvertScan(LogicalOperator):
    method __init__ (line 270) | def __init__(
    method __str__ (line 283) | def __str__(self):
    method get_logical_id_params (line 286) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 297) | def get_logical_op_params(self) -> dict:
  class Distinct (line 309) | class Distinct(LogicalOperator):
    method __init__ (line 310) | def __init__(self, distinct_cols: list[str] | None, *args, **kwargs):
    method __str__ (line 324) | def __str__(self):
    method get_logical_id_params (line 327) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 333) | def get_logical_op_params(self) -> dict:
  class FilteredScan (line 343) | class FilteredScan(LogicalOperator):
    method __init__ (line 346) | def __init__(
    method __str__ (line 357) | def __str__(self):
    method get_logical_id_params (line 360) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 370) | def get_logical_op_params(self) -> dict:
  class GroupByAggregate (line 381) | class GroupByAggregate(LogicalOperator):
    method __init__ (line 382) | def __init__(
    method __str__ (line 396) | def __str__(self):
    method get_logical_id_params (line 399) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 405) | def get_logical_op_params(self) -> dict:
  class JoinOp (line 415) | class JoinOp(LogicalOperator):
    method __init__ (line 416) | def __init__(self, condition: str, on: list[str] | None = None, how: s...
    method __str__ (line 423) | def __str__(self):
    method get_logical_id_params (line 426) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 438) | def get_logical_op_params(self) -> dict:
  class LimitScan (line 451) | class LimitScan(LogicalOperator):
    method __init__ (line 452) | def __init__(self, limit: int, *args, **kwargs):
    method __str__ (line 456) | def __str__(self):
    method get_logical_id_params (line 459) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 465) | def get_logical_op_params(self) -> dict:
  class Project (line 475) | class Project(LogicalOperator):
    method __init__ (line 476) | def __init__(self, project_cols: list[str], *args, **kwargs):
    method __str__ (line 480) | def __str__(self):
    method get_logical_id_params (line 483) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 489) | def get_logical_op_params(self) -> dict:
  class TopKScan (line 499) | class TopKScan(LogicalOperator):
    method __init__ (line 502) | def __init__(
    method __str__ (line 519) | def __str__(self):
    method get_logical_id_params (line 522) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 536) | def get_logical_op_params(self) -> dict:
  class ComputeOperator (line 550) | class ComputeOperator(LogicalOperator):
    method __init__ (line 556) | def __init__(self, context_id: str, instruction: str, *args, **kwargs):
    method __str__ (line 561) | def __str__(self):
    method get_logical_id_params (line 564) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 574) | def get_logical_op_params(self) -> dict:
  class SearchOperator (line 585) | class SearchOperator(LogicalOperator):
    method __init__ (line 591) | def __init__(self, context_id: str, search_query: str, *args, **kwargs):
    method __str__ (line 596) | def __str__(self):
    method get_logical_id_params (line 599) | def get_logical_id_params(self) -> dict:
    method get_logical_op_params (line 609) | def get_logical_op_params(self) -> dict:

FILE: src/palimpzest/query/operators/mixture_of_agents.py
  class MixtureOfAgentsConvert (line 16) | class MixtureOfAgentsConvert(LLMConvert):
    method __init__ (line 18) | def __init__(
    method __str__ (line 41) | def __str__(self):
    method get_id_params (line 48) | def get_id_params(self):
    method get_op_params (line 59) | def get_op_params(self):
    method naive_cost_estimates (line 70) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method convert (line 106) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class MixtureOfAgentsFilter (line 133) | class MixtureOfAgentsFilter(LLMFilter):
    method __init__ (line 135) | def __init__(
    method __str__ (line 158) | def __str__(self):
    method get_id_params (line 165) | def get_id_params(self):
    method get_op_params (line 176) | def get_op_params(self):
    method naive_cost_estimates (line 187) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method filter (line 221) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/physical.py
  class PhysicalOperator (line 14) | class PhysicalOperator:
    method __init__ (line 21) | def __init__(
    method __str__ (line 71) | def __str__(self):
    method __eq__ (line 82) | def __eq__(self, other) -> bool:
    method copy (line 85) | def copy(self) -> PhysicalOperator:
    method op_name (line 88) | def op_name(self) -> str:
    method get_id_params (line 92) | def get_id_params(self) -> dict:
    method get_op_params (line 106) | def get_op_params(self) -> dict:
    method get_op_id (line 123) | def get_op_id(self):
    method get_logical_op_id (line 148) | def get_logical_op_id(self) -> str:
    method get_unique_logical_op_id (line 151) | def get_unique_logical_op_id(self) -> str:
    method get_full_op_id (line 154) | def get_full_op_id(self):
    method is_image_op (line 157) | def is_image_op(self) -> bool:
    method is_audio_op (line 161) | def is_audio_op(self) -> bool:
    method __hash__ (line 165) | def __hash__(self):
    method get_model_name (line 168) | def get_model_name(self) -> str | None:
    method get_input_fields (line 172) | def get_input_fields(self):
    method get_fields_to_generate (line 187) | def get_fields_to_generate(self, candidate: DataRecord) -> list[str]:
    method naive_cost_estimates (line 204) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 224) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/project.py
  class ProjectOp (line 8) | class ProjectOp(PhysicalOperator):
    method __init__ (line 9) | def __init__(self, project_cols: list[str], *args, **kwargs):
    method __str__ (line 13) | def __str__(self):
    method get_id_params (line 18) | def get_id_params(self):
    method get_op_params (line 22) | def get_op_params(self):
    method naive_cost_estimates (line 26) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method __call__ (line 35) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/operators/rag.py
  class RAGConvert (line 18) | class RAGConvert(LLMConvert):
    method __init__ (line 19) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ...
    method __str__ (line 28) | def __str__(self):
    method get_id_params (line 35) | def get_id_params(self):
    method get_op_params (line 45) | def get_op_params(self):
    method naive_cost_estimates (line 55) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method chunk_text (line 84) | def chunk_text(self, text: str, chunk_size: int) -> list[str]:
    method compute_embedding (line 99) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio...
    method compute_similarity (line 129) | def compute_similarity(self, query_embedding: list[float], chunk_embed...
    method get_chunked_candidate (line 135) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method convert (line 195) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class RAGFilter (line 229) | class RAGFilter(LLMFilter):
    method __init__ (line 230) | def __init__(self, embedding_model: Model, num_chunks_per_field: int, ...
    method __str__ (line 239) | def __str__(self):
    method get_id_params (line 246) | def get_id_params(self):
    method get_op_params (line 256) | def get_op_params(self):
    method naive_cost_estimates (line 266) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method chunk_text (line 295) | def chunk_text(self, text: str, chunk_size: int) -> list[str]:
    method compute_embedding (line 310) | def compute_embedding(self, text: str) -> tuple[list[float], Generatio...
    method compute_similarity (line 340) | def compute_similarity(self, query_embedding: list[float], chunk_embed...
    method get_chunked_candidate (line 346) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method filter (line 402) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/scan.py
  class ScanPhysicalOp (line 14) | class ScanPhysicalOp(PhysicalOperator, ABC):
    method __init__ (line 21) | def __init__(self, datasource: Any, *args, **kwargs):
    method __str__ (line 25) | def __str__(self):
    method get_id_params (line 30) | def get_id_params(self):
    method get_op_params (line 34) | def get_op_params(self):
    method naive_cost_estimates (line 39) | def naive_cost_estimates(
    method __call__ (line 60) | def __call__(self, idx: int) -> DataRecordSet:
  class MarshalAndScanDataOp (line 95) | class MarshalAndScanDataOp(ScanPhysicalOp):
    method naive_cost_estimates (line 96) | def naive_cost_estimates(
  class ContextScanOp (line 127) | class ContextScanOp(PhysicalOperator):
    method __init__ (line 132) | def __init__(self, context: context.Context, *args, **kwargs):
    method __str__ (line 136) | def __str__(self):
    method get_id_params (line 141) | def get_id_params(self):
    method get_op_params (line 144) | def get_op_params(self):
    method naive_cost_estimates (line 148) | def naive_cost_estimates(
    method __call__ (line 166) | def __call__(self, *args, **kwargs) -> DataRecordSet:

FILE: src/palimpzest/query/operators/search.py
  function make_tool (line 18) | def make_tool(bound_method):
  class SmolAgentsSearch (line 39) | class SmolAgentsSearch(PhysicalOperator):
    method __init__ (line 43) | def __init__(self, context_id: str, search_query: str, *args, **kwargs):
    method __str__ (line 53) | def __str__(self):
    method get_id_params (line 59) | def get_id_params(self):
    method get_op_params (line 67) | def get_op_params(self):
    method naive_cost_estimates (line 75) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method _create_record_set (line 83) | def _create_record_set(
    method __call__ (line 127) | def __call__(self, candidate: DataRecord) -> Any:

FILE: src/palimpzest/query/operators/split.py
  class SplitConvert (line 20) | class SplitConvert(LLMConvert):
    method __init__ (line 21) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,...
    method __str__ (line 32) | def __str__(self):
    method get_id_params (line 38) | def get_id_params(self):
    method get_op_params (line 44) | def get_op_params(self):
    method naive_cost_estimates (line 48) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method get_text_chunks (line 77) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
    method get_chunked_candidate (line 93) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method convert (line 138) | def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo])...
  class SplitFilter (line 170) | class SplitFilter(LLMFilter):
    method __init__ (line 171) | def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000,...
    method __str__ (line 182) | def __str__(self):
    method get_id_params (line 188) | def get_id_params(self):
    method get_op_params (line 194) | def get_op_params(self):
    method naive_cost_estimates (line 198) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method get_text_chunks (line 227) | def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
    method get_chunked_candidate (line 243) | def get_chunked_candidate(self, candidate: DataRecord, input_fields: l...
    method filter (line 288) | def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], Gene...

FILE: src/palimpzest/query/operators/topk.py
  class Singleton (line 21) | class Singleton:
    method __new__ (line 22) | def __new__(cls, *args, **kw):
  class ClipModel (line 28) | class ClipModel(Singleton):
    method get_model (line 33) | def get_model(cls, model_name: str):
  class TopKOp (line 39) | class TopKOp(PhysicalOperator):
    method __init__ (line 40) | def __init__(
    method __str__ (line 80) | def __str__(self):
    method get_id_params (line 85) | def get_id_params(self):
    method get_op_params (line 97) | def get_op_params(self):
    method naive_cost_estimates (line 110) | def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostE...
    method default_search_func (line 122) | def default_search_func(self, index: Collection, query: list[str] | li...
    method _create_record_set (line 157) | def _create_record_set(
    method __call__ (line 214) | def __call__(self, candidate: DataRecord) -> DataRecordSet:

FILE: src/palimpzest/query/optimizer/cost_model.py
  class BaseCostModel (line 18) | class BaseCostModel:
    method __init__ (line 24) | def __init__(self):
    method get_costed_full_op_ids (line 32) | def get_costed_full_op_ids(self) -> set[str]:
    method __call__ (line 38) | def __call__(self, operator: PhysicalOperator) -> PlanCost:
  class SampleBasedCostModel (line 46) | class SampleBasedCostModel:
    method __init__ (line 49) | def __init__(
    method get_costed_full_op_ids (line 77) | def get_costed_full_op_ids(self):
    method _compute_operator_stats (line 80) | def _compute_operator_stats(self, sentinel_plan_stats: SentinelPlanSta...
    method _compute_naive_plan_cost (line 153) | def _compute_naive_plan_cost(self, operator: PhysicalOperator, source_...
    method __call__ (line 210) | def __call__(self, operator: PhysicalOperator, source_op_estimates: Op...

FILE: src/palimpzest/query/optimizer/optimizer.py
  class Optimizer (line 49) | class Optimizer:
    method __init__ (line 64) | def __init__(
    method update_cost_model (line 164) | def update_cost_model(self, cost_model: BaseCostModel):
    method get_physical_op_params (line 167) | def get_physical_op_params(self):
    method deepcopy_clean (line 176) | def deepcopy_clean(self):
    method update_strategy (line 195) | def update_strategy(self, optimizer_strategy: OptimizationStrategyType):
    method construct_group_tree (line 207) | def construct_group_tree(self, dataset: Dataset) -> tuple[int, dict[st...
    method convert_query_plan_to_group_tree (line 341) | def convert_query_plan_to_group_tree(self, dataset: Dataset) -> str:
    method heuristic_optimization (line 380) | def heuristic_optimization(self, group_id: int) -> None:
    method search_optimization_space (line 386) | def search_optimization_space(self, group_id: int) -> None:
    method optimize (line 415) | def optimize(self, dataset: Dataset) -> list[PhysicalPlan]:

FILE: src/palimpzest/query/optimizer/optimizer_strategy.py
  class OptimizationStrategy (line 13) | class OptimizationStrategy(ABC):
    method get_optimal_plans (line 15) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class GreedyStrategy (line 20) | class GreedyStrategy(OptimizationStrategy):
    method _get_greedy_physical_plan (line 21) | def _get_greedy_physical_plan(self, groups: dict, group_id: int) -> Ph...
    method get_optimal_plans (line 58) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class ParetoStrategy (line 66) | class ParetoStrategy(OptimizationStrategy):
    method _get_candidate_pareto_physical_plans (line 67) | def _get_candidate_pareto_physical_plans(self, groups: dict, group_id:...
    method get_optimal_plans (line 119) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class SentinelStrategy (line 143) | class SentinelStrategy(OptimizationStrategy):
    method _get_sentinel_plan (line 144) | def _get_sentinel_plan(self, groups: dict[str, Group], group_id: int) ...
    method get_optimal_plans (line 172) | def get_optimal_plans(self, groups: dict, final_group_id: int, policy:...
  class NoOptimizationStrategy (line 179) | class NoOptimizationStrategy(GreedyStrategy):

FILE: src/palimpzest/query/optimizer/optimizer_strategy_type.py
  class OptimizationStrategyType (line 11) | class OptimizationStrategyType(Enum):
    method no_transformation (line 21) | def no_transformation(self) -> bool:
    method is_pareto (line 27) | def is_pareto(self) -> bool:
    method is_not_pareto (line 33) | def is_not_pareto(self) -> bool:

FILE: src/palimpzest/query/optimizer/plan.py
  class Plan (line 14) | class Plan(ABC):
    method compute_plan_id (line 16) | def compute_plan_id(self) -> str:
    method __eq__ (line 20) | def __eq__(self, other) -> bool:
    method __hash__ (line 24) | def __hash__(self) -> int:
    method __repr__ (line 28) | def __repr__(self) -> str:
    method __str__ (line 32) | def __str__(self) -> str:
    method __getitem__ (line 36) | def __getitem__(self, slice) -> tuple:
    method __iter__ (line 40) | def __iter__(self) -> iter:
    method __len__ (line 44) | def __len__(self) -> int:
  class PhysicalPlan (line 47) | class PhysicalPlan(Plan):
    method __init__ (line 48) | def __init__(self, operator: PhysicalOperator, subplans: list[Physical...
    method compute_plan_id (line 70) | def compute_plan_id(self) -> str:
    method get_est_total_outputs (line 80) | def get_est_total_outputs(self, num_samples: int | None = None, curren...
    method _compute_next_unique_full_op_map (line 137) | def _compute_next_unique_full_op_map(self, next_map: dict[str, str | N...
    method get_next_unique_full_op_and_id (line 171) | def get_next_unique_full_op_and_id(self, topo_idx: int, operator: Phys...
    method get_next_unique_full_op_id (line 176) | def get_next_unique_full_op_id(self, topo_idx: int, operator: Physical...
    method _compute_upstream_unique_full_op_ids_map (line 182) | def _compute_upstream_unique_full_op_ids_map(self, upstream_map: dict[...
    method get_upstream_unique_full_op_ids (line 206) | def get_upstream_unique_full_op_ids(self, unique_full_op_id: str) -> l...
    method _compute_source_unique_full_op_ids_map (line 210) | def _compute_source_unique_full_op_ids_map(self, source_map: dict[str,...
    method get_source_unique_full_op_ids (line 234) | def get_source_unique_full_op_ids(self, topo_idx: int, operator: Physi...
    method __eq__ (line 239) | def __eq__(self, other):
    method __hash__ (line 242) | def __hash__(self):
    method __repr__ (line 245) | def __repr__(self) -> str:
    method _get_str (line 248) | def _get_str(self, idx: int = 0, indent: int = 0) -> str:
    method __str__ (line 256) | def __str__(self):
    method __getitem__ (line 259) | def __getitem__(self, slice):
    method __iter__ (line 263) | def __iter__(self):
    method __len__ (line 268) | def __len__(self):
    method _from_ops (line 272) | def _from_ops(cls, ops: list[PhysicalOperator], plan_cost: PlanCost | ...
  class SentinelPlan (line 290) | class SentinelPlan(Plan):
    method __init__ (line 291) | def __init__(self, operator_set: list[PhysicalOperator], subplans: lis...
    method compute_plan_id (line 311) | def compute_plan_id(self) -> str:
    method __eq__ (line 321) | def __eq__(self, other):
    method __hash__ (line 324) | def __hash__(self):
    method __repr__ (line 327) | def __repr__(self) -> str:
    method _get_str (line 330) | def _get_str(self, idx: int = 0, indent: int = 0) -> str:
    method __str__ (line 340) | def __str__(self):
    method __getitem__ (line 343) | def __getitem__(self, slice):
    method __iter__ (line 347) | def __iter__(self):
    method __len__ (line 352) | def __len__(self):
    method _compute_next_unique_logical_op_id_map (line 355) | def _compute_next_unique_logical_op_id_map(self, next_map: dict[str, s...
    method get_next_unique_logical_op_id (line 389) | def get_next_unique_logical_op_id(self, unique_logical_op_id: str) -> ...
    method _compute_root_dataset_ids_map (line 393) | def _compute_root_dataset_ids_map(self, root_dataset_ids_map: dict[str...
    method get_root_dataset_ids (line 421) | def get_root_dataset_ids(self, unique_logical_op_id: str) -> list[str]:
    method _compute_source_unique_logical_op_ids_map (line 425) | def _compute_source_unique_logical_op_ids_map(self, source_map: dict[s...
    method get_source_unique_logical_op_ids (line 449) | def get_source_unique_logical_op_ids(self, unique_logical_op_id: str) ...

FILE: src/palimpzest/query/optimizer/primitives.py
  class Expression (line 12) | class Expression:
    method __init__ (line 19) | def __init__(
    method __eq__ (line 49) | def __eq__(self, other):
    method __str__ (line 52) | def __str__(self):
    method __hash__ (line 61) | def __hash__(self):
    method _compute_expr_id (line 67) | def _compute_expr_id(self) -> int:
    method add_applied_rule (line 70) | def add_applied_rule(self, rule: type[rules.Rule]):
    method set_group_id (line 73) | def set_group_id(self, group_id: int) -> None:
  class LogicalExpression (line 77) | class LogicalExpression(Expression):
  class PhysicalExpression (line 81) | class PhysicalExpression(Expression):
    method from_op_and_logical_expr (line 84) | def from_op_and_logical_expr(cls, op: PhysicalOperator, logical_expres...
  class Group (line 96) | class Group:
    method __init__ (line 103) | def __init__(self, logical_expressions: list[LogicalExpression], field...
    method set_explored (line 119) | def set_explored(self):
    method _compute_group_id (line 122) | def _compute_group_id(self) -> int:

FILE: src/palimpzest/query/optimizer/rules.py
  class Rule (line 62) | class Rule:
    method get_rule_id (line 68) | def get_rule_id(cls):
    method matches_pattern (line 72) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 76) | def substitute(cls, logical_expression: LogicalExpression, **kwargs: d...
  class TransformationRule (line 80) | class TransformationRule(Rule):
    method is_exploration_rule (line 88) | def is_exploration_rule(cls) -> bool:
    method substitute (line 93) | def substitute(
  class ReorderConverts (line 108) | class ReorderConverts(TransformationRule):
    method is_exploration_rule (line 114) | def is_exploration_rule(cls) -> bool:
    method matches_pattern (line 118) | def matches_pattern(cls, logical_expression: Expression) -> bool:
    method substitute (line 124) | def substitute(
  class PushDownFilter (line 245) | class PushDownFilter(TransformationRule):
    method matches_pattern (line 252) | def matches_pattern(cls, logical_expression: Expression) -> bool:
    method substitute (line 258) | def substitute(
  class ImplementationRule (line 375) | class ImplementationRule(Rule):
    method _get_image_fields (line 381) | def _get_image_fields(cls, logical_expression: LogicalExpression) -> s...
    method _get_list_image_fields (line 390) | def _get_list_image_fields(cls, logical_expression: LogicalExpression)...
    method _get_audio_fields (line 399) | def _get_audio_fields(cls, logical_expression: LogicalExpression) -> s...
    method _get_list_audio_fields (line 408) | def _get_list_audio_fields(cls, logical_expression: LogicalExpression)...
    method _is_image_only_operation (line 417) | def _is_image_only_operation(cls, logical_expression: LogicalExpressio...
    method _is_image_operation (line 426) | def _is_image_operation(cls, logical_expression: LogicalExpression) ->...
    method _is_audio_only_operation (line 435) | def _is_audio_only_operation(cls, logical_expression: LogicalExpressio...
    method _is_audio_operation (line 444) | def _is_audio_operation(cls, logical_expression: LogicalExpression) ->...
    method _is_text_only_operation (line 453) | def _is_text_only_operation(cls, logical_expression: LogicalExpression...
    method _is_text_operation (line 462) | def _is_text_operation(cls, logical_expression: LogicalExpression) -> ...
    method _is_text_image_multimodal_operation (line 472) | def _is_text_image_multimodal_operation(cls, logical_expression: Logic...
    method _is_text_audio_multimodal_operation (line 477) | def _is_text_audio_multimodal_operation(cls, logical_expression: Logic...
    method _model_matches_input (line 482) | def _model_matches_input(cls, model: Model, logical_expression: Logica...
    method _embedding_model_matches_input (line 525) | def _embedding_model_matches_input(cls, model: Model, logical_expressi...
    method _get_fixed_op_kwargs (line 534) | def _get_fixed_op_kwargs(cls, logical_expression: LogicalExpression, r...
    method _perform_substitution (line 553) | def _perform_substitution(
  class NonLLMConvertRule (line 606) | class NonLLMConvertRule(ImplementationRule):
    method matches_pattern (line 612) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 618) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class LLMConvertBondedRule (line 623) | class LLMConvertBondedRule(ImplementationRule):
    method matches_pattern (line 629) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 635) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class RAGRule (line 655) | class RAGRule(ImplementationRule):
    method matches_pattern (line 664) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 672) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class MixtureOfAgentsRule (line 711) | class MixtureOfAgentsRule(ImplementationRule):
    method matches_pattern (line 720) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 728) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class CritiqueAndRefineRule (line 752) | class CritiqueAndRefineRule(ImplementationRule):
    method matches_pattern (line 758) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 766) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class SplitRule (line 801) | class SplitRule(ImplementationRule):
    method matches_pattern (line 809) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 817) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class TopKRule (line 839) | class TopKRule(ImplementationRule):
    method matches_pattern (line 846) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 852) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class NonLLMFilterRule (line 861) | class NonLLMFilterRule(ImplementationRule):
    method matches_pattern (line 867) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 874) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class LLMFilterRule (line 879) | class LLMFilterRule(ImplementationRule):
    method matches_pattern (line 885) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 892) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class RelationalJoinRule (line 912) | class RelationalJoinRule(ImplementationRule):
    method matches_pattern (line 918) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 924) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class NestedLoopsJoinRule (line 929) | class NestedLoopsJoinRule(ImplementationRule):
    method matches_pattern (line 935) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 941) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class EmbeddingJoinRule (line 963) | class EmbeddingJoinRule(ImplementationRule):
    method matches_pattern (line 969) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 975) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class SemanticAggregateRule (line 1001) | class SemanticAggregateRule(ImplementationRule):
    method matches_pattern (line 1007) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1013) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class AggregateRule (line 1033) | class AggregateRule(ImplementationRule):
    method matches_pattern (line 1039) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1045) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class AddContextsBeforeComputeRule (line 1067) | class AddContextsBeforeComputeRule(ImplementationRule):
    method matches_pattern (line 1077) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1083) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...
  class BasicSubstitutionRule (line 1111) | class BasicSubstitutionRule(ImplementationRule):
    method matches_pattern (line 1129) | def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
    method substitute (line 1136) | def substitute(cls, logical_expression: LogicalExpression, **runtime_k...

FILE: src/palimpzest/query/optimizer/tasks.py
  class Task (line 17) | class Task:
    method perform (line 24) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class OptimizeGroup (line 33) | class OptimizeGroup(Task):
    method __init__ (line 44) | def __init__(self, group_id: int):
    method perform (line 47) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class ExploreGroup (line 79) | class ExploreGroup(Task):
    method __init__ (line 84) | def __init__(self, group_id: int):
    method perform (line 87) | def perform(self, groups: dict[int, Group], context: dict[str, Any] | ...
  class OptimizeLogicalExpression (line 119) | class OptimizeLogicalExpression(Task):
    method __init__ (line 127) | def __init__(self, logical_expression: Expression, exploring: bool = F...
    method perform (line 131) | def perform(
  class ApplyRule (line 168) | class ApplyRule(Task):
    method __init__ (line 188) | def __init__(self, rule: type[Rule], logical_expression: Expression, e...
    method perform (line 193) | def perform(
  class OptimizePhysicalExpression (line 277) | class OptimizePhysicalExpression(Task):
    method __init__ (line 287) | def __init__(self, physical_expression: Expression, exploring: bool = ...
    method update_best_physical_expression (line 291) | def update_best_physical_expression(self, group: Group, policy: Policy...
    method _is_dominated (line 324) | def _is_dominated(self, plan_cost: PlanCost, other_plan_cost: PlanCost...
    method _is_pareto_optimal (line 363) | def _is_pareto_optimal(self, expr_plan_cost: PlanCost, pareto_optimal_...
    method update_pareto_optimal_physical_expressions (line 376) | def update_pareto_optimal_physical_expressions(self, group: Group, pol...
    method perform (line 423) | def perform(

FILE: src/palimpzest/query/processor/config.py
  class QueryProcessorConfig (line 10) | class QueryProcessorConfig(BaseModel):
    method to_dict (line 57) | def to_dict(self) -> dict:
    method copy (line 61) | def copy(self) -> QueryProcessorConfig:

FILE: src/palimpzest/query/processor/query_processor.py
  class QueryProcessor (line 18) | class QueryProcessor:
    method __init__ (line 25) | def __init__(
    method execution_id (line 69) | def execution_id(self) -> str:
    method _create_sentinel_plan (line 80) | def _create_sentinel_plan(self, train_dataset: dict[str, Dataset] | No...
    method _execute_best_plan (line 100) | def _execute_best_plan(self, dataset: Dataset, optimizer: Optimizer) -...
    method execute (line 111) | def execute(self) -> DataRecordCollection:

FILE: src/palimpzest/query/processor/query_processor_factory.py
  class QueryProcessorFactory (line 23) | class QueryProcessorFactory:
    method _convert_to_enum (line 26) | def _convert_to_enum(cls, enum_type: type[Enum], value: str) -> Enum:
    method _normalize_strategies (line 34) | def _normalize_strategies(cls, config: QueryProcessorConfig):
    method _normalize_models (line 60) | def _normalize_models(cls, config: QueryProcessorConfig) -> QueryProce...
    method _config_validation_and_normalization (line 109) | def _config_validation_and_normalization(cls, config: QueryProcessorCo...
    method _create_optimizer (line 164) | def _create_optimizer(cls, config: QueryProcessorConfig) -> Optimizer:
    method _create_execution_strategy (line 168) | def _create_execution_strategy(cls, dataset: Dataset, config: QueryPro...
    method _create_sentinel_execution_strategy (line 187) | def _create_sentinel_execution_strategy(cls, config: QueryProcessorCon...
    method create_processor (line 198) | def create_processor(
    method create_and_run_processor (line 238) | def create_and_run_processor(

FILE: src/palimpzest/schemabuilder/schema_builder.py
  class SchemaBuilder (line 21) | class SchemaBuilder:
    method from_file (line 24) | def from_file(cls,
    method from_csv (line 98) | def from_csv(
    method from_jsonld (line 132) | def from_jsonld(
    method from_json (line 176) | def from_json(
    method from_yml (line 201) | def from_yml(

FILE: src/palimpzest/tools/allenpdf.py
  function process_papermage_pdf (line 34) | def process_papermage_pdf(pdf_bytes_docs: list[bytes]):
  function main (line 59) | def main():

FILE: src/palimpzest/tools/pdfparser.py
  function get_md5 (line 17) | def get_md5(file_bytes: bytes) -> str:
  function cosmos_parquet_to_json (line 26) | def cosmos_parquet_to_json(path):
  function cosmos_json_txt (line 99) | def cosmos_json_txt(cosmos_json):
  function cosmos_client (line 111) | def cosmos_client(name: str, data: BinaryIO, output_dir: str, delay=10):
  function get_text_from_pdf (line 191) | def get_text_from_pdf(filename, pdf_bytes, pdfprocessor="pypdf", enable_...

FILE: src/palimpzest/tools/skema_tools.py
  function equations_to_latex (line 10) | def equations_to_latex(image_content):
  function equations_to_latex_base64 (line 19) | def equations_to_latex_base64(image_content):

FILE: src/palimpzest/utils/env_helpers.py
  function load_env (line 5) | def load_env():

FILE: src/palimpzest/utils/hash_helpers.py
  function hash_for_id (line 7) | def hash_for_id(id_str: str, max_chars: int = MAX_ID_CHARS) -> str:
  function hash_for_serialized_dict (line 11) | def hash_for_serialized_dict(dict_obj: dict) -> str:

FILE: src/palimpzest/utils/model_helpers.py
  function get_models (line 8) | def get_models(include_embedding: bool = False, use_vertex: bool = False...
  function get_optimal_models (line 78) | def get_optimal_models(policy: Policy, include_embedding: bool = False, ...
  function use_reasoning_prompt (line 194) | def use_reasoning_prompt(reasoning_effort: str) -> bool:
  function resolve_reasoning_effort (line 202) | def resolve_reasoning_effort(model: Model, reasoning_effort: str) -> str...

FILE: src/palimpzest/utils/model_info_helpers.py
  function _normalize_model_name (line 192) | def _normalize_model_name(name: str) -> str:
  function _extract_version_info (line 197) | def _extract_version_info(name: str) -> tuple[str, str | None, str | None]:
  function fuzzy_match_score (line 233) | def fuzzy_match_score(model_id: str, scores_dict: dict[str, float]) -> f...
  function _extract_model_size (line 297) | def _extract_model_size(model_id: str) -> str | None:
  function derive_model_flags (line 310) | def derive_model_flags(model_id: str) -> dict[str, bool]:
  function _estimate_tps_from_size (line 373) | def _estimate_tps_from_size(model_id: str) -> float | None:
  function predict_local_model_metrics (line 403) | def predict_local_model_metrics(model_id: str) -> dict[str, Any]:
  class ModelMetricsManager (line 458) | class ModelMetricsManager:
    method __new__ (line 464) | def __new__(cls, *args, **kwargs):
    method __init__ (line 469) | def __init__(self):
    method _load_data (line 476) | def _load_data(self):
    method get_model_metrics (line 485) | def get_model_metrics(self, model_name) -> dict[str, Any]:
    method refresh_data (line 489) | def refresh_data(self) -> None:

FILE: src/palimpzest/utils/progress.py
  class ProgressStats (line 32) | class ProgressStats:
  function get_memory_usage (line 42) | def get_memory_usage() -> float:
  class ProgressManager (line 53) | class ProgressManager(ABC):
    method __init__ (line 56) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int...
    method get_task_total (line 117) | def get_task_total(self, unique_full_op_id: str) -> int:
    method get_task_description (line 122) | def get_task_description(self, unique_full_op_id: str) -> str:
    method add_task (line 128) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 133) | def start(self):
    method incr (line 138) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 152) | def finish(self):
  class MockProgressManager (line 157) | class MockProgressManager(ProgressManager):
    method __init__ (line 160) | def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int...
    method add_task (line 163) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 166) | def start(self):
    method incr (line 169) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 172) | def finish(self):
    method incr_overall_progress_cost (line 175) | def incr_overall_progress_cost(self, cost_delta: float):
  class PZProgressManager (line 178) | class PZProgressManager(ProgressManager):
    method __init__ (line 181) | def __init__(self, plan: PhysicalPlan, num_samples: int | None = None):
    method add_task (line 185) | def add_task(self, unique_full_op_id: str, op_str: str, total: int):
    method start (line 203) | def start(self):
    method incr (line 213) | def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_output...
    method finish (line 265) | def finish(self):
    method update_stats (line 278) | def update_stats(self, unique_full_op_id: str, **kwargs):
  class PZSentinelProgressManager (line 288) | class PZSentinelProgressManager(ProgressManager):
    method __init__ (line 289) | def __init__(self, plan: SentinelPlan, sample_budget: int | None, samp...
    method _is_llm_op (line 358) | def _is_llm_op(self, physical_op: PhysicalOperator) -> bool:
    method get_task_description (line 365) | def get_task_description(self, unique_logical_op_id: str) -> str:
    method add_task (line 370) | def add_task(self, unique_logical_op_id: str, op_str: str, total: int):
    method start (line 388) | def start(self):
    method incr_overall_progress_cost (line 398) | def incr_overall_progress_cost(self, cost_delta: float):
    method incr (line 411) | def incr(self, unique_logical_op_id: str, num_samples: int, display_te...
    method finish (line 455) | def finish(self):
    method update_stats (line 468) | def update_stats(self, unique_logical_op_id: str, **kwargs):
  function create_progress_manager (line 478) | def create_progress_manager(

FILE: src/palimpzest/utils/udfs.py
  function url_to_file (line 14) | def url_to_file(candidate: dict):
  function file_to_xls (line 28) | def file_to_xls(candidate: dict):
  function xls_to_tables (line 34) | def xls_to_tables(candidate: dict):

FILE: src/palimpzest/validator/validator.py
  class Validator (line 25) | class Validator:
    method __init__ (line 33) | def __init__(self, model: Model = Model.o4_MINI):
    method map_score_fn (line 38) | def map_score_fn(self, fields: list[str], input_record: dict, output: ...
    method flat_map_score_fn (line 41) | def flat_map_score_fn(self, fields: list[str], input_record: dict, out...
    method filter_score_fn (line 44) | def filter_score_fn(self, filter_str: str, input_record: dict, output:...
    method join_score_fn (line 47) | def join_score_fn(self, condition: str, left_input_record: dict, right...
    method topk_score_fn (line 50) | def topk_score_fn(self, fields: list[str], input_record: dict, output:...
    method _get_gen_stats_from_completion (line 53) | def _get_gen_stats_from_completion(self, completion, start_time: float...
    method _default_map_score_fn (line 75) | def _default_map_score_fn(self, op: LLMConvert, fields: list[str], inp...
    method _default_flat_map_score_fn (line 111) | def _default_flat_map_score_fn(self, op: LLMConvert, fields: list[str]...
    method _default_filter_score_fn (line 150) | def _default_filter_score_fn(self, op: LLMFilter, filter_str: str, inp...
    method _default_join_score_fn (line 190) | def _default_join_score_fn(self, op: JoinOp, condition: str, left_inpu...
    method _default_topk_score_fn (line 227) | def _default_topk_score_fn(self, op: TopKOp, fields: list[str], input_...
    method _score_map (line 267) | def _score_map(self, op: LLMConvert, fields: list[str], input_record: ...
    method _score_flat_map (line 276) | def _score_flat_map(self, op: LLMConvert, fields: list[str], input_rec...
    method _score_filter (line 285) | def _score_filter(self, op: LLMFilter, filter_str: str, input_record: ...
    method _score_join (line 294) | def _score_join(self, op: JoinOp, condition: str, left_input_record: D...
    method _score_topk (line 303) | def _score_topk(self, op: TopKOp, fields: list[str], input_record: Dat...

FILE: tests/pytest/conftest.py
  function dataset (line 26) | def dataset(request, enron_eval_tiny, real_estate_eval_tiny):
  function workload (line 36) | def workload(
  function policy (line 56) | def policy(request):
  function physical_plan (line 68) | def physical_plan(
  function sentinel_plan (line 92) | def sentinel_plan(
  function execution_data (line 105) | def execution_data(
  function expected_records (line 120) | def expected_records(
  function champion_outputs (line 146) | def champion_outputs(
  function expected_qualities (line 164) | def expected_qualities(
  function side_effect (line 184) | def side_effect(
  function operator_to_stats (line 203) | def operator_to_stats(
  function expected_plan (line 232) | def expected_plan(

FILE: tests/pytest/fixtures/champion_outputs.py
  function scan_convert_filter_champion_outputs (line 10) | def scan_convert_filter_champion_outputs(scan_convert_filter_sentinel_pl...
  function scan_convert_filter_empty_champion_outputs (line 47) | def scan_convert_filter_empty_champion_outputs(scan_convert_filter_senti...
  function scan_convert_filter_varied_champion_outputs (line 84) | def scan_convert_filter_varied_champion_outputs(scan_convert_filter_sent...
  function scan_multi_convert_multi_filter_champion_outputs (line 121) | def scan_multi_convert_multi_filter_champion_outputs(scan_multi_convert_...

FILE: tests/pytest/fixtures/datasets.py
  class RealEstateListingDataset (line 16) | class RealEstateListingDataset(IterDataset):
    method __init__ (line 17) | def __init__(self, listings_dir):
    method __len__ (line 28) | def __len__(self):
    method __getitem__ (line 31) | def __getitem__(self, idx: int):
  class CostModelTestDataset (line 49) | class CostModelTestDataset(IterDataset):
    method __init__ (line 50) | def __init__(self):
    method __len__ (line 54) | def __len__(self):
    method __getitem__ (line 57) | def __getitem__(self, idx: int):
  function project_root (line 66) | def project_root() -> Path:
  function enron_eval_tiny_data_path (line 71) | def enron_eval_tiny_data_path(project_root) -> str:
  function real_estate_eval_tiny_data_path (line 76) | def real_estate_eval_tiny_data_path(project_root) -> str:
  function enron_eval_tiny (line 82) | def enron_eval_tiny(enron_eval_tiny_data_path):
  function real_estate_eval_tiny (line 87) | def real_estate_eval_tiny(real_estate_eval_tiny_data_path):
  function cost_model_test_dataset (line 92) | def cost_model_test_dataset():

FILE: tests/pytest/fixtures/execution_data.py
  function scan_convert_filter_execution_data (line 11) | def scan_convert_filter_execution_data(scan_convert_filter_sentinel_plan...
  function scan_convert_filter_varied_execution_data (line 114) | def scan_convert_filter_varied_execution_data(scan_convert_filter_sentin...
  function scan_multi_convert_multi_filter_execution_data (line 225) | def scan_multi_convert_multi_filter_execution_data(scan_multi_convert_mu...

FILE: tests/pytest/fixtures/expected_physical_plans.py
  function get_three_converts_plan (line 17) | def get_three_converts_plan(three_converts_workload, enron_eval_tiny, em...
  function three_converts_min_cost_expected_plan (line 52) | def three_converts_min_cost_expected_plan(three_converts_workload, enron...
  function three_converts_max_quality_expected_plan (line 71) | def three_converts_max_quality_expected_plan(three_converts_workload, en...
  function three_converts_min_cost_at_fixed_quality_expected_plan (line 90) | def three_converts_min_cost_at_fixed_quality_expected_plan(three_convert...
  function three_converts_max_quality_at_fixed_cost_expected_plan (line 109) | def three_converts_max_quality_at_fixed_cost_expected_plan(three_convert...
  function get_one_filter_one_convert_plan (line 128) | def get_one_filter_one_convert_plan(one_filter_one_convert_workload, enr...
  function one_filter_one_convert_min_cost_expected_plan (line 163) | def one_filter_one_convert_min_cost_expected_plan(one_filter_one_convert...
  function get_two_converts_two_filters_plan (line 186) | def get_two_converts_two_filters_plan(two_converts_two_filters_workload,...
  function two_converts_two_filters_min_cost_expected_plan (line 228) | def two_converts_two_filters_min_cost_expected_plan(two_converts_two_fil...
  function two_converts_two_filters_max_quality_expected_plan (line 257) | def two_converts_two_filters_max_quality_expected_plan(two_converts_two_...
  function two_converts_two_filters_min_cost_at_fixed_quality_expected_plan (line 286) | def two_converts_two_filters_min_cost_at_fixed_quality_expected_plan(two...
  function two_converts_two_filters_max_quality_at_fixed_cost_expected_plan (line 315) | def two_converts_two_filters_max_quality_at_fixed_cost_expected_plan(two...

FILE: tests/pytest/fixtures/expected_qualities.py
  function scan_convert_filter_qualities (line 8) | def scan_convert_filter_qualities(scan_convert_filter_execution_data):
  function scan_convert_filter_empty_qualities (line 19) | def scan_convert_filter_empty_qualities(scan_convert_filter_execution_da...
  function scan_convert_filter_varied_qualities (line 44) | def scan_convert_filter_varied_qualities(scan_convert_filter_varied_exec...
  function scan_convert_filter_varied_override_qualities (line 76) | def scan_convert_filter_varied_override_qualities(scan_convert_filter_va...
  function scan_multi_convert_multi_filter_qualities (line 135) | def scan_multi_convert_multi_filter_qualities(scan_multi_convert_multi_f...

FILE: tests/pytest/fixtures/expected_records.py
  function enron_all_expected_records (line 12) | def enron_all_expected_records(enron_eval_tiny_data_path):
  function enron_filter_expected_records (line 25) | def enron_filter_expected_records(enron_all_expected_records):
  function real_estate_all_expected_records (line 35) | def real_estate_all_expected_records(real_estate_eval_tiny_data_path, im...
  function real_estate_one_to_many_expected_records (line 58) | def real_estate_one_to_many_expected_records(real_estate_eval_tiny_data_...
  function scan_convert_filter_expected_outputs (line 84) | def scan_convert_filter_expected_outputs(foobar_schema):
  function scan_convert_filter_empty_expected_outputs (line 102) | def scan_convert_filter_empty_expected_outputs():
  function scan_convert_filter_varied_expected_outputs (line 106) | def scan_convert_filter_varied_expected_outputs(foobar_schema):
  function scan_multi_convert_multi_filter_expected_outputs (line 127) | def scan_multi_convert_multi_filter_expected_outputs(foobar_schema, baz_...

FILE: tests/pytest/fixtures/models.py
  function embedding_text_only_model (line 9) | def embedding_text_only_model():

FILE: tests/pytest/fixtures/operator_to_stats.py
  function get_three_converts_logical_and_full_op_ids (line 15) | def get_three_converts_logical_and_full_op_ids(three_converts_workload, ...
  function three_converts_min_cost_operator_to_stats (line 71) | def three_converts_min_cost_operator_to_stats(three_converts_workload, e...
  function three_converts_max_quality_operator_to_stats (line 100) | def three_converts_max_quality_operator_to_stats(three_converts_workload...
  function three_converts_min_cost_at_fixed_quality_operator_to_stats (line 129) | def three_converts_min_cost_at_fixed_quality_operator_to_stats(three_con...
  function three_converts_max_quality_at_fixed_cost_operator_to_stats (line 158) | def three_converts_max_quality_at_fixed_cost_operator_to_stats(three_con...
  function get_one_filter_one_convert_logical_and_full_op_ids (line 190) | def get_one_filter_one_convert_logical_and_full_op_ids(one_filter_one_co...
  function one_filter_one_convert_min_cost_operator_to_stats (line 240) | def one_filter_one_convert_min_cost_operator_to_stats(one_filter_one_con...
  function get_two_converts_two_filters_logical_and_full_op_ids (line 264) | def get_two_converts_two_filters_logical_and_full_op_ids(two_converts_tw...
  function two_converts_two_filters_min_cost_operator_to_stats (line 332) | def two_converts_two_filters_min_cost_operator_to_stats(two_converts_two...
  function two_converts_two_filters_max_quality_operator_to_stats (line 366) | def two_converts_two_filters_max_quality_operator_to_stats(two_converts_...
  function two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats (line 400) | def two_converts_two_filters_min_cost_at_fixed_quality_operator_to_stats...
  function two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats (line 434) | def two_converts_two_filters_max_quality_at_fixed_cost_operator_to_stats...

FILE: tests/pytest/fixtures/physical_plans.py
  function scan_only_plan (line 16) | def scan_only_plan(enron_eval_tiny):
  function non_llm_filter_plan (line 23) | def non_llm_filter_plan(enron_eval_tiny):
  function llm_filter_plan (line 36) | def llm_filter_plan(enron_eval_tiny):
  function bonded_llm_convert_plan (line 51) | def bonded_llm_convert_plan(email_schema, enron_eval_tiny):
  function rag_convert_plan (line 64) | def rag_convert_plan(email_schema, enron_eval_tiny, embedding_text_only_...
  function image_convert_plan (line 80) | def image_convert_plan(real_estate_listing_files_schema, image_real_esta...
  function one_to_many_convert_plan (line 93) | def one_to_many_convert_plan(real_estate_listing_files_schema, room_real...
  function scan_convert_filter_sentinel_plan (line 107) | def scan_convert_filter_sentinel_plan(foobar_schema):
  function scan_multi_convert_multi_filter_sentinel_plan (line 134) | def scan_multi_convert_multi_filter_sentinel_plan(foobar_schema, baz_sch...

FILE: tests/pytest/fixtures/schemas.py
  function email_schema (line 11) | def email_schema():
  function real_estate_listing_files_schema (line 21) | def real_estate_listing_files_schema():
  function text_real_estate_listing_schema (line 32) | def text_real_estate_listing_schema(real_estate_listing_files_schema):
  function image_real_estate_listing_schema (line 42) | def image_real_estate_listing_schema(real_estate_listing_files_schema):
  function room_real_estate_listing_schema (line 57) | def room_real_estate_listing_schema(real_estate_listing_files_schema):
  function case_data_schema (line 69) | def case_data_schema():
  function foobar_schema (line 100) | def foobar_schema():
  function baz_schema (line 108) | def baz_schema():

FILE: tests/pytest/fixtures/side_effects.py
  function enron_filter (line 8) | def enron_filter():
  function enron_convert (line 20) | def enron_convert(email_schema):
  function real_estate_convert (line 52) | def real_estate_convert(image_real_estate_listing_schema):
  function real_estate_one_to_many_convert (line 70) | def real_estate_one_to_many_convert(room_real_estate_listing_schema):

FILE: tests/pytest/fixtures/workloads.py
  function within_two_miles_of_mit (line 5) | def within_two_miles_of_mit(record):
  function in_price_range (line 26) | def in_price_range(record):
  function enron_workload (line 39) | def enron_workload(enron_eval_tiny, email_schema):
  function small_real_estate_workload (line 52) | def small_real_estate_workload(
  function real_estate_workload (line 68) | def real_estate_workload(
  function three_converts_workload (line 92) | def three_converts_workload(enron_eval_tiny, email_schema, foobar_schema...
  function one_filter_one_convert_workload (line 102) | def one_filter_one_convert_workload(enron_eval_tiny, email_schema):
  function two_converts_two_filters_workload (line 111) | def two_converts_two_filters_workload(enron_eval_tiny, email_schema, foo...

FILE: tests/pytest/test_aggregate.py
  class TextInputSchema (line 21) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 38) | class OutputSchema(BaseModel):
  function create_input_record (line 41) | def create_input_record(input_schema: type[BaseModel], idx: int) -> Data...
  function mock_generator_call (line 59) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_aggregate (line 77) | def test_aggregate(mocker, input_schema, physical_op_class):

FILE: tests/pytest/test_convert.py
  function test_convert (line 27) | def test_convert(mocker, convert_op, side_effect, email_schema, enron_ev...

FILE: tests/pytest/test_dataset.py
  function sample_df (line 11) | def sample_df():
  function test_dataset_initialization (line 19) | def test_dataset_initialization(sample_df):
  function test_dataset_filter (line 25) | def test_dataset_filter(sample_df):
  function test_dataset_add_columns (line 39) | def test_dataset_add_columns(sample_df):

FILE: tests/pytest/test_distinct.py
  function sample_df (line 20) | def sample_df():
  function test_distinct (line 29) | def test_distinct(sample_df, execution_strategy):
  function test_dataset_with_distinct_cols (line 39) | def test_dataset_with_distinct_cols(sample_df, execution_strategy):
  function test_dataset_with_distinct_cols_and_limit (line 49) | def test_dataset_with_distinct_cols_and_limit(sample_df, execution_strat...
  function test_dataset_with_distinct_cols_and_filter (line 59) | def test_dataset_with_distinct_cols_and_filter(sample_df, execution_stra...

FILE: tests/pytest/test_dynamic_models.py
  function input_schema (line 48) | def input_schema():
  function output_schema (line 56) | def output_schema():
  function sample_record (line 64) | def sample_record(input_schema):
  function mock_litellm_response (line 70) | def mock_litellm_response():
  class TestModelInstantiation (line 85) | class TestModelInstantiation:
    method test_known_model_instantiation (line 88) | def test_known_model_instantiation(self):
    method test_model_instantiation_with_string (line 94) | def test_model_instantiation_with_string(self):
    method test_unknown_model_raises_error (line 101) | def test_unknown_model_raises_error(self):
    method test_model_properties_from_specs (line 106) | def test_model_properties_from_specs(self):
    method test_model_provider_property (line 115) | def test_model_provider_property(self):
    method test_model_api_base_parameter (line 123) | def test_model_api_base_parameter(self):
  class TestModelRegistry (line 135) | class TestModelRegistry:
    method test_models_registered_on_creation (line 138) | def test_models_registered_on_creation(self):
    method test_get_all_models_returns_list (line 148) | def test_get_all_models_returns_list(self):
    method test_registry_contains_expected_models (line 154) | def test_registry_contains_expected_models(self):
  class TestModelEqualityAndHashing (line 172) | class TestModelEqualityAndHashing:
    method test_model_equality_same_instance (line 175) | def test_model_equality_same_instance(self):
    method test_model_equality_same_value (line 180) | def test_model_equality_same_value(self):
    method test_model_equality_with_string (line 186) | def test_model_equality_with_string(self):
    method test_model_inequality (line 191) | def test_model_inequality(self):
    method test_model_hash_consistency (line 195) | def test_model_hash_consistency(self):
    method test_model_usable_in_set (line 201) | def test_model_usable_in_set(self):
    method test_model_usable_as_dict_key (line 206) | def test_model_usable_as_dict_key(self):
    method test_model_str_repr (line 211) | def test_model_str_repr(self):
    method test_model_lt_comparison (line 217) | def test_model_lt_comparison(self):
  class TestModelHelperFunctions (line 229) | class TestModelHelperFunctions:
    method test_get_models_with_openai_key (line 232) | def test_get_models_with_openai_key(self):
    method test_get_models_excludes_embedding_by_default (line 239) | def test_get_models_excludes_embedding_by_default(self):
    method test_get_models_includes_embedding_when_requested (line 246) | def test_get_models_includes_embedding_when_requested(self):
    method test_get_models_empty_without_keys (line 253) | def test_get_models_empty_without_keys(self):
    method test_get_optimal_models_returns_top_models (line 264) | def test_get_optimal_models_returns_top_models(self):
    method test_get_optimal_models_respects_policy (line 270) | def test_get_optimal_models_respects_policy(self):
    method test_get_optimal_models_never_returns_empty_with_available_models (line 283) | def test_get_optimal_models_never_returns_empty_with_available_models(...
    method test_get_optimal_models_fallback_returns_best_by_primary_metric (line 293) | def test_get_optimal_models_fallback_returns_best_by_primary_metric(se...
    method test_get_optimal_models_fallback_with_time_policy (line 308) | def test_get_optimal_models_fallback_with_time_policy(self):
  class TestGeneratorIntegration (line 323) | class TestGeneratorIntegration:
    method test_generator_uses_model_value (line 327) | def test_generator_uses_model_value(
    method test_generator_with_different_providers (line 354) | def test_generator_with_different_providers(
  class TestQueryProcessorIntegration (line 384) | class TestQueryProcessorIntegration:
    method test_factory_accepts_model_list (line 388) | def test_factory_accepts_model_list(self, mock_processor_cls):
    method test_factory_auto_selects_models_when_none_provided (line 409) | def test_factory_auto_selects_models_when_none_provided(self):
  class TestEndToEndIntegration (line 439) | class TestEndToEndIntegration:
    method test_simple_sem_map_pipeline (line 446) | def test_simple_sem_map_pipeline(self):
    method test_pipeline_with_filter (line 490) | def test_pipeline_with_filter(self):
    method test_pipeline_with_auto_model_selection (line 526) | def test_pipeline_with_auto_model_selection(self):
  class TestVLLMModelSupport (line 555) | class TestVLLMModelSupport:
    method test_vllm_model_creation_with_api_base (line 560) | def test_vllm_model_creation_with_api_base(self):
    method test_vllm_model_stores_extra_kwargs (line 566) | def test_vllm_model_stores_extra_kwargs(self):
    method test_vllm_model_without_api_base_raises (line 571) | def test_vllm_model_without_api_base_raises(self):
    method test_vllm_model_cost_is_zero (line 578) | def test_vllm_model_cost_is_zero(self):
    method test_predict_local_model_metrics_known_model (line 590) | def test_predict_local_model_metrics_known_model(self):
    method test_predict_local_model_metrics_unknown_model (line 596) | def test_predict_local_model_metrics_unknown_model(self):
    method test_vllm_model_has_quality_score (line 602) | def test_vllm_model_has_quality_score(self):
    method test_vllm_model_has_latency (line 608) | def test_vllm_model_has_latency(self):
    method test_vllm_model_unknown_gets_defaults (line 614) | def test_vllm_model_unknown_gets_defaults(self):
    method test_fuzzy_match_exact_substring (line 622) | def test_fuzzy_match_exact_substring(self):
    method test_fuzzy_match_normalized (line 627) | def test_fuzzy_match_normalized(self):
    method test_fuzzy_match_no_match_returns_none (line 632) | def test_fuzzy_match_no_match_returns_none(self):
    method test_derive_model_flags_llama (line 639) | def test_derive_model_flags_llama(self):
    method test_derive_model_flags_non_llama (line 644) | def test_derive_model_flags_non_llama(self):
    method test_derive_model_flags_clip (line 649) | def test_derive_model_flags_clip(self):
    method test_derive_model_flags_gpt5 (line 654) | def test_derive_model_flags_gpt5(self):
    method test_derive_model_flags_o_model (line 659) | def test_derive_model_flags_o_model(self):
    method test_vllm_model_is_vllm (line 666) | def test_vllm_model_is_vllm(self):
    method test_vllm_llama_model_is_llama (line 671) | def test_vllm_llama_model_is_llama(self):
    method test_vllm_non_llama_is_not_llama (line 676) | def test_vllm_non_llama_is_not_llama(self):
    method test_vllm_model_defaults (line 683) | def test_vllm_model_defaults(self):
    method test_factory_rejects_multiple_vllm_models (line 691) | def test_factory_rejects_multiple_vllm_models(self):
    method test_generator_passes_vllm_kwargs (line 710) | def test_generator_passes_vllm_kwargs(self, mock_completion, sample_re...

FILE: tests/pytest/test_dynamicschema.py
  function test_dynamicschema_jsonld (line 15) | def test_dynamicschema_jsonld(project_root: Path):
  function test_dynamicschema_csv (line 20) | def test_dynamicschema_csv(project_root: Path):
  function test_dynamicschema_json (line 26) | def test_dynamicschema_json(mocker, enron_workload, enron_convert, enron...
  function test_dynamicschema_yml (line 55) | def test_dynamicschema_yml(mocker, enron_workload, enron_convert, enron_...

FILE: tests/pytest/test_execution.py
  class TestExecution (line 19) | class TestExecution:
    method test_execute_full_plan (line 54) | def test_execute_full_plan(self, mocker, execution_strategy, dataset, ...

FILE: tests/pytest/test_filter.py
  class TextInputSchema (line 26) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 30) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 34) | class AudioInputSchema(BaseModel):
  function mock_generator_call (line 43) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_filter (line 61) | def test_filter(mocker, input_schema, physical_op_class, embedding_text_...

FILE: tests/pytest/test_generator.py
  function generate_session_id (line 15) | def generate_session_id() -> str:
  function question (line 24) | def question():
  function output_schema (line 31) | def output_schema():
  function test_generator (line 46) | def test_generator(model, question, output_schema):
  function test_vllm_generator (line 55) | def test_vllm_generator(question, output_schema):
  class TextInputSchema (line 154) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 160) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 166) | class AudioInputSchema(BaseModel):
  class AnimalOutputSchema (line 179) | class AnimalOutputSchema(BaseModel):
  function create_input_record (line 468) | def create_input_record(input_schema, modality: str):
  function get_model_for_provider (line 496) | def get_model_for_provider(provider: str) -> Model:
  function get_input_schema_for_modality (line 514) | def get_input_schema_for_modality(modality: str):
  function check_api_key (line 570) | def check_api_key(provider: str) -> bool:
  function is_modality_supported (line 579) | def is_modality_supported(provider: str, modality: str) -> bool:
  function within_tolerance (line 584) | def within_tolerance(actual: int, expected: int, tolerance: float = 0.05...
  function assert_stats_match (line 592) | def assert_stats_match(gen_stats, expected: dict, request_name: str, pro...
  function test_generator_stats (line 675) | def test_generator_stats(provider, modality):

FILE: tests/pytest/test_iter_dataset.py
  function temp_text_file (line 17) | def temp_text_file():
  function temp_text_dir (line 25) | def temp_text_dir():
  function list_values (line 38) | def list_values():
  function df_values (line 42) | def df_values():
  function test_text_dataset (line 46) | def test_text_dataset(temp_text_dir):
  function test_memory_dataset_list (line 58) | def test_memory_dataset_list(list_values):
  function test_memory_dataset_df (line 69) | def test_memory_dataset_df(df_values):
  function test_memory_dataset_copy (line 81) | def test_memory_dataset_copy():
  function temp_html_dir (line 89) | def temp_html_dir(tmp_path):
  function test_html_dataset (line 103) | def test_html_dataset(temp_html_dir):
  function test_invalid_directory (line 113) | def test_invalid_directory():

FILE: tests/pytest/test_join.py
  class TextInputSchema (line 21) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 25) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 29) | class AudioInputSchema(BaseModel):
  function create_input_record (line 38) | def create_input_record(schema: type[BaseModel]) -> DataRecord:
  function mock_generator_call (line 53) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function embedding_join_mock_generator_call (line 60) | def embedding_join_mock_generator_call(candidate, fields, right_candidat...
  function test_join (line 82) | def test_join(mocker, left_input_schema, right_input_schema, physical_op...
  function test_embedding_join (line 134) | def test_embedding_join(mocker, embedding_text_only_model):

FILE: tests/pytest/test_map.py
  class TextInputSchema (line 25) | class TextInputSchema(BaseModel):
  class ImageInputSchema (line 29) | class ImageInputSchema(BaseModel):
  class AudioInputSchema (line 33) | class AudioInputSchema(BaseModel):
  class OutputSchema (line 42) | class OutputSchema(BaseModel):
  function mock_generator_call (line 45) | def mock_generator_call(candidate, fields, right_candidate=None, json_ou...
  function test_map (line 63) | def test_map(mocker, input_schema, physical_op_class, embedding_text_onl...

FILE: tests/pytest/test_optimizer.py
  class TestPrimitives (line 22) | class TestPrimitives:
    method test_group_id_equality (line 23) | def test_group_id_equality(self, email_schema):
  class TestOptimizer (line 105) | class TestOptimizer:
    method test_basic_functionality (line 106) | def test_basic_functionality(self, enron_eval_tiny, opt_strategy):
    method test_simple_max_quality_convert (line 123) | def test_simple_max_quality_convert(self, enron_eval_tiny, email_schem...
    method test_simple_min_cost_convert (line 147) | def test_simple_min_cost_convert(self, enron_eval_tiny, email_schema, ...
    method test_simple_min_time_convert (line 166) | def test_simple_min_time_convert(self, enron_eval_tiny, email_schema, ...
    method test_simple_vllm_convert (line 185) | def test_simple_vllm_convert(self, enron_eval_tiny, email_schema, opt_...
    method test_push_down_filter (line 205) | def test_push_down_filter(self, enron_eval_tiny, email_schema, opt_str...
    method test_push_down_two_filters (line 226) | def test_push_down_two_filters(self, enron_eval_tiny, email_schema, op...
    method test_small_real_estate_logical_reorder (line 249) | def test_small_real_estate_logical_reorder(self, small_real_estate_wor...
    method test_real_estate_logical_reorder (line 272) | def test_real_estate_logical_reorder(self, real_estate_workload, opt_s...
    method test_seven_filters (line 297) | def test_seven_filters(self, enron_eval_tiny, email_schema, opt_strate...
  class MockSampleBasedCostModel (line 342) | class MockSampleBasedCostModel:
    method __init__ (line 345) | def __init__(self, operator_to_stats):
    method get_costed_full_op_ids (line 358) | def get_costed_full_op_ids(self):
    method __call__ (line 361) | def __call__(
  class TestParetoOptimizer (line 451) | class TestParetoOptimizer:
    method test_pareto_optimization_strategy (line 452) | def test_pareto_optimization_strategy(self, workload, policy, operator...

FILE: tests/pytest/test_physical.py
  class SimpleSchema (line 15) | class SimpleSchema(BaseModel):
  class SimpleSchemaTwo (line 19) | class SimpleSchemaTwo(BaseModel):
  function test_physical_operator_init (line 24) | def test_physical_operator_init():
  function test_physical_operator_equality (line 41) | def test_physical_operator_equality():
  function test_physical_operator_str (line 52) | def test_physical_operator_str():
  function test_physical_operator_id_generation (line 64) | def test_physical_operator_id_generation():
  function test_physical_operator_copy (line 82) | def test_physical_operator_copy():

FILE: tests/pytest/test_records.py
  class TestSchema (line 11) | class TestSchema(BaseModel):
  class TestDataRecord (line 16) | class TestDataRecord:
    method sample_record (line 18) | def sample_record(self):
    method sample_df (line 24) | def sample_df(self):
    method test_create_record (line 31) | def test_create_record(self, sample_record):
    method test_record_equality (line 37) | def test_record_equality(self, sample_record):
    method test_to_df (line 42) | def test_to_df(self, sample_df):
    method test_to_df_with_project_cols (line 51) | def test_to_df_with_project_cols(self, sample_df):
    method test_invalid_attribute (line 60) | def test_invalid_attribute(self, sample_record):
    method test_to_dict (line 65) | def test_to_dict(self, sample_record):
    method test_to_json_str (line 71) | def test_to_json_str(self, sample_record):

FILE: tests/pytest/test_rules.py
  function schema (line 12) | def schema():
  function base_scan_op (line 19) | def base_scan_op(schema):
  function test_substitute_methods (line 25) | def test_substitute_methods(base_scan_op):

FILE: tests/pytest/test_scan.py
  class List (line 9) | class List(BaseModel):
  function test_marshal_and_scan_memory_source (line 13) | def test_marshal_and_scan_memory_source():

FILE: tests/pytest/test_schemas.py
  class Dog (line 15) | class Dog(BaseModel):
  class Cat (line 19) | class Cat(BaseModel):
  function test_schema_equality (line 23) | def test_schema_equality():
  function test_get_schema_field_names (line 27) | def test_get_schema_field_names():
  function test_project_schema (line 31) | def test_project_schema():
  function test_create_schema_from_fields (line 40) | def test_create_schema_from_fields():
  function test_create_schema_from_df (line 51) | def test_create_schema_from_df():
  function test_union_schemas (line 67) | def test_union_schemas():

FILE: website/src/components/HomepageFeatures/index.tsx
  type FeatureItem (line 6) | type FeatureItem = {
  function Feature (line 46) | function Feature({title, Svg, description}: FeatureItem) {
  function HomepageFeatures (line 60) | function HomepageFeatures(): ReactNode {

FILE: website/src/components/ResearchPage/admonitions.tsx
  type AbstractProps (line 3) | interface AbstractProps {
  function Abstract (line 9) | function Abstract({ children }: AbstractProps) {

FILE: website/src/pages/index.tsx
  function HomepageHeader (line 11) | function HomepageHeader() {
  function Home (line 32) | function Home(): ReactNode {

Download .json

Condensed preview — 250 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,043K chars).

[
  {
    "path": ".github/workflows/ci.yaml",
    "chars": 2376,
    "preview": "name: PZ Merge Checks\n\non:\n  pull_request:\n    branches:\n      - main\n\njobs:\n  test:\n    runs-on: ubuntu-latest\n    step"
  },
  {
    "path": ".github/workflows/docs.yaml",
    "chars": 1327,
    "preview": "name: Deploy Docs to GitHub Pages\n\non:\n  push:\n    branches:\n      - main\n\npermissions:\n  contents: write\n\njobs:\n  build"
  },
  {
    "path": ".github/workflows/package.yaml",
    "chars": 2386,
    "preview": "name: package\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n    branches:\n      - main\n\njobs:\n  build:\n    run"
  },
  {
    "path": ".github/workflows/test-docs.yaml",
    "chars": 571,
    "preview": "name: Test Building Docs\n\non:\n  pull_request:\n    branches:\n      - main\n\njobs:\n  test-deploy:\n    name: Test deployment"
  },
  {
    "path": ".gitignore",
    "chars": 980,
    "preview": "docs/site/\n*.zip\n.cache/\n.env\nbuild/*\ndocs/build/*\ndocs/source/generated/*\ndist/*\n.vscode/*\n.idea/*\n.chroma\n.chroma-biod"
  },
  {
    "path": "LICENSE",
    "chars": 1079,
    "preview": "MIT License\n\nCopyright (c) 2024 MIT Data Systems Group\n\nPermission is hereby granted, free of charge, to any person obta"
  },
  {
    "path": "README.md",
    "chars": 3495,
    "preview": "![pz-banner](https://palimpzest-workloads.s3.us-east-1.amazonaws.com/palimpzest-cropped.png)\n\n# Palimpzest (PZ)\n[![Disco"
  },
  {
    "path": "abacus-research/README.md",
    "chars": 1306,
    "preview": "## Chroma Embeddings and MMQA files\nYou can download the chroma embeddings we computed for MMQA and BioDEX by executing "
  },
  {
    "path": "abacus-research/README_CUAD_LOCAL.md",
    "chars": 1096,
    "preview": "# CUAD Local Data Setup and Usage\n\n## Setup\n\nSince HuggingFace datasets no longer supports loading scripts, we've create"
  },
  {
    "path": "abacus-research/biodex-ablation.py",
    "chars": 16893,
    "preview": "import argparse\nimport json\nimport os\nimport time\n\nimport chromadb\nimport datasets\nfrom chromadb.utils.embedding_functio"
  },
  {
    "path": "abacus-research/biodex-demo.py",
    "chars": 18711,
    "preview": "import argparse\nimport json\nimport os\nimport time\n\nimport chromadb\nimport datasets\nfrom chromadb.utils.embedding_functio"
  },
  {
    "path": "abacus-research/biodex-max-quality-at-cost.py",
    "chars": 18204,
    "preview": "import argparse\nimport json\nimport os\nimport time\n\nimport chromadb\nimport datasets\nfrom chromadb.utils.embedding_functio"
  },
  {
    "path": "abacus-research/biodex-min-at-fixed-quality.py",
    "chars": 17946,
    "preview": "import argparse\nimport json\nimport os\nimport time\n\nimport chromadb\nimport datasets\nfrom chromadb.utils.embedding_functio"
  },
  {
    "path": "abacus-research/biodex-pareto-cascades.py",
    "chars": 18173,
    "preview": "import argparse\nimport json\nimport os\nimport time\n\nimport chromadb\nimport datasets\nfrom chromadb.utils.embedding_functio"
  },
  {
    "path": "abacus-research/biodex-priors-cascades.json",
    "chars": 98813,
    "preview": "{\"0005c18b69\": {\"quality\": 0.19444444444444442, \"cost\": 0.0038703809999999996, \"time\": 61.16110005378724}, \"009df798a3\":"
  },
  {
    "path": "abacus-research/biodex-priors.json",
    "chars": 99314,
    "preview": "{\"00c93aec22\": {\"quality\": 0.06301075268817204, \"cost\": 0.011813586, \"time\": 87.30672872066498}, \"00f4acd0d3\": {\"quality"
  },
  {
    "path": "abacus-research/biodex-revision-priors-maxquality.json",
    "chars": 27368,
    "preview": "{\"00b02360ef\": {\"quality\": 0.21648684648684646, \"cost\": 0.040317356500000005, \"time\": 40.99749290347099}, \"025a41642e\": "
  },
  {
    "path": "abacus-research/biodex-revision-priors-mincost.json",
    "chars": 27556,
    "preview": "{\"00b02360ef\": {\"quality\": 0.25047369297369293, \"cost\": 0.038505449000000004, \"time\": 33.970101940631864}, \"025a41642e\":"
  },
  {
    "path": "abacus-research/cheap-priors-cascades.json",
    "chars": 83561,
    "preview": "{\"0005c18b69\": {\"quality\": 0.5422666666666667, \"cost\": 1.62e-06, \"time\": 0.026}, \"009df798a3\": {\"quality\": 0.470225, \"co"
  },
  {
    "path": "abacus-research/cheap-priors.json",
    "chars": 83576,
    "preview": "{\"00c93aec22\": {\"quality\": 0.426725, \"cost\": 3.1199999999999998e-06, \"time\": 0.030900000000000004}, \"00f4acd0d3\": {\"qual"
  },
  {
    "path": "abacus-research/cuad-demo.py",
    "chars": 34286,
    "preview": "import argparse\nimport json\nimport os\nimport string\n\nimport numpy as np\nimport pandas as pd\nfrom cuad_data_loader import"
  },
  {
    "path": "abacus-research/cuad-max-quality-at-cost.py",
    "chars": 32984,
    "preview": "import argparse\nimport json\nimport os\nimport string\n\nimport numpy as np\nimport pandas as pd\nfrom cuad_data_loader import"
  },
  {
    "path": "abacus-research/cuad-priors.json",
    "chars": 100961,
    "preview": "{\"00c93aec22\": {\"quality\": 0.5304878048780488, \"cost\": 0.01609626, \"time\": 121.91315126419067}, \"00f4acd0d3\": {\"quality\""
  },
  {
    "path": "abacus-research/cuad_data_loader.py",
    "chars": 2719,
    "preview": "\"\"\"\nShared CUAD data loading utilities to replace HuggingFace datasets.\nAll CUAD scripts should import from this module."
  },
  {
    "path": "abacus-research/download_embeddings_and_mmqa.sh",
    "chars": 126,
    "preview": "#!/bin/bash\n\nwget -nc https://palimpzest-workloads.s3.us-east-1.amazonaws.com/abacus-data.tar.gz\ntar -xzf abacus-data.ta"
  },
  {
    "path": "abacus-research/helper-scripts/biodex-gen-index.py",
    "chars": 2724,
    "preview": "import os\nimport time\n\nimport chromadb\nimport chromadb.utils.embedding_functions as embedding_functions\nimport numpy as "
  },
  {
    "path": "abacus-research/helper-scripts/generate-prior-stats-biodex-first-convert.py",
    "chars": 7407,
    "preview": "\"\"\"\nNOTE: this script worked with the tag `abacus-paper-experiments` but is no longer compatible with the main branch.\n\""
  },
  {
    "path": "abacus-research/helper-scripts/generate-prior-stats-biodex.py",
    "chars": 10980,
    "preview": "\"\"\"\nNOTE: this script worked with the tag `abacus-paper-experiments` but is no longer compatible with the main branch.\n\""
  },
  {
    "path": "abacus-research/helper-scripts/generate-prior-stats-cuad.py",
    "chars": 25722,
    "preview": "\"\"\"\nNOTE: this script worked with the tag `abacus-paper-experiments` but is no longer compatible with the main branch.\n\""
  },
  {
    "path": "abacus-research/helper-scripts/mmqa-baseline.py",
    "chars": 4776,
    "preview": "import argparse\nimport json\nimport os\nimport string\nimport time\n\nimport numpy as np\nfrom openai import OpenAI\n\nfrom pali"
  },
  {
    "path": "abacus-research/helper-scripts/mmqa-gen-image-index.py",
    "chars": 3634,
    "preview": "import json\nimport os\n\nimport chromadb\nimport chromadb.utils.embedding_functions as embedding_functions\nimport numpy as "
  },
  {
    "path": "abacus-research/helper-scripts/mmqa-gen-image-title-index.py",
    "chars": 3365,
    "preview": "import json\nimport os\nimport time\n\nimport chromadb\nimport chromadb.utils.embedding_functions as embedding_functions\nimpo"
  },
  {
    "path": "abacus-research/helper-scripts/mmqa-gen-table-index.py",
    "chars": 3778,
    "preview": "import json\nimport os\nimport time\n\nimport chromadb\nimport chromadb.utils.embedding_functions as embedding_functions\nimpo"
  },
  {
    "path": "abacus-research/helper-scripts/mmqa-gen-text-index.py",
    "chars": 2814,
    "preview": "import json\nimport os\nimport time\n\nimport chromadb\nimport chromadb.utils.embedding_functions as embedding_functions\nimpo"
  },
  {
    "path": "abacus-research/mmqa-complex-demo.py",
    "chars": 27750,
    "preview": "import argparse\nimport base64\nimport json\nimport os\nimport string\nimport time\n\nimport numpy as np\nimport regex as re\n\nim"
  },
  {
    "path": "abacus-research/mmqa-demo.py",
    "chars": 25125,
    "preview": "import argparse\nimport base64\nimport json\nimport os\nimport string\nimport time\n\nimport chromadb\nimport numpy as np\nimport"
  },
  {
    "path": "abacus-research/run_ablation_study.sh",
    "chars": 1253,
    "preview": "#!/bin/bash\n\nfor seed in {0..9}\ndo\n    for priors in none naive sample\n    do\n        for sentinel in mab random\n       "
  },
  {
    "path": "abacus-research/run_biodex.sh",
    "chars": 291,
    "preview": "#!/bin/bash\n\nfor seed in {0..9}\ndo\n    echo \"Running Seed: ${seed}\"\n    exp_name=\"biodex-final-mab-k6-j4-budget150-seed$"
  },
  {
    "path": "abacus-research/run_biodex_cascades.sh",
    "chars": 2165,
    "preview": "#!/bin/bash\n\n\nfor seed in {0..9}\ndo\n  for budget in 150 300 450\n  do\n    for strategy in \"greedy\" \"pareto\"\n    do\n      "
  },
  {
    "path": "abacus-research/run_biodex_cost_threshold.sh",
    "chars": 1216,
    "preview": "#!/bin/bash\n\n\nfor cost in 1.0 2.0 4.0 8.0 999.99\ndo\n  for seed in {0..9}\n  do\n    # set variables\n    budget=450\n    k=4"
  },
  {
    "path": "abacus-research/run_biodex_min_cost_latency.sh",
    "chars": 399,
    "preview": "#!/bin/bash\n\n\nfor policy in \"mincost\" \"minlatency\"\ndo\n  for seed in {0..9}\n  do\n    # set variables\n    budget=150\n    k"
  },
  {
    "path": "abacus-research/run_biodex_priors.sh",
    "chars": 1861,
    "preview": "#!/bin/bash\n\nfor sample_budget in 10 20 50 100\ndo\n  for seed in {0..9}\n  do\n    k=0\n    j=0\n    if [[ $sample_budget -eq"
  },
  {
    "path": "abacus-research/run_biodex_priors_constrained.sh",
    "chars": 1939,
    "preview": "#!/bin/bash\n\nfor sample_budget in 10 20 50 100\ndo\n  for seed in {0..9}\n  do\n    k=0\n    j=0\n    if [[ $sample_budget -eq"
  },
  {
    "path": "abacus-research/run_cuad.sh",
    "chars": 273,
    "preview": "#!/bin/bash\n\nfor seed in {0..9}\ndo\n  policy=\"maxquality\"\n  echo \"Running Seed: ${seed} -- policy: ${policy}\"\n  exp_name="
  },
  {
    "path": "abacus-research/run_cuad_cost_threshold.sh",
    "chars": 1185,
    "preview": "#!/bin/bash\n\n\nfor cost in 1.0 2.0 4.0 8.0 999.99\ndo\n  for seed in {0..9}\n  do\n    # set variables\n    budget=300\n    k=6"
  },
  {
    "path": "abacus-research/run_cuad_min_cost_latency.sh",
    "chars": 310,
    "preview": "#!/bin/bash\n\nfor policy in \"mincost\" \"minlatency\"\ndo\n  for seed in {0..9}\n  do\n    echo \"Running Seed: ${seed}\"\n    exp_"
  },
  {
    "path": "abacus-research/run_cuad_priors.sh",
    "chars": 1810,
    "preview": "#!/bin/bash\n\nfor sample_budget in 5 10 20 50\ndo\n  for seed in {0..9}\n  do\n    k=0\n    j=0\n    if [[ $sample_budget -eq 5"
  },
  {
    "path": "abacus-research/run_cuad_priors_constrained.sh",
    "chars": 1888,
    "preview": "#!/bin/bash\n\nfor sample_budget in 5 10 20 50\ndo\n  for seed in {0..9}\n  do\n    k=0\n    j=0\n    if [[ $sample_budget -eq 5"
  },
  {
    "path": "abacus-research/run_mmqa.sh",
    "chars": 249,
    "preview": "#!/bin/bash\n\nfor seed in {0..9}\ndo\n    echo \"Running Seed: ${seed}\"\n    exp_name=\"mmqa-final-mab-k6-j4-budget150-seed${s"
  },
  {
    "path": "abacus-research/run_mmqa_complex.sh",
    "chars": 574,
    "preview": "#!/bin/bash\n\n# for seed in {0..9}\n# Lotus error'ed on seed 5 and 7, so we limit to these seeds only for a consistent com"
  },
  {
    "path": "abacus-research/run_mmqa_complex_min_cost_latency.sh",
    "chars": 1016,
    "preview": "#!/bin/bash\n\n# for seed in {0..9}\n# Lotus error'ed on seed 5 and 7, so we limit to these seeds only for a consistent com"
  },
  {
    "path": "abacus-research/run_mmqa_min_cost_latency.sh",
    "chars": 341,
    "preview": "#!/bin/bash\n\nfor policy in \"mincost\" \"minlatency\"\ndo\n    for seed in {0..9}\n    do\n        echo \"Running Seed: ${seed}\"\n"
  },
  {
    "path": "abacus-research/score_biodex.py",
    "chars": 1733,
    "preview": "import json\n\nimport numpy as np\n\n\ndef compute_final_metrics(metric: str, dir: str, exp_base_name: str):\n    qualities = "
  },
  {
    "path": "abacus-research/score_cuad.py",
    "chars": 1863,
    "preview": "import json\nimport os\n\nimport numpy as np\n\n\ndef compute_final_metrics(metric: str, dir: str, exp_base_name: str):\n    qu"
  },
  {
    "path": "abacus-research/score_mmqa.py",
    "chars": 1704,
    "preview": "import json\n\nimport numpy as np\n\n\ndef compute_final_metrics(metric: str, dir: str, exp_base_name: str):\n    qualities = "
  },
  {
    "path": "abacus-research/score_mmqa_complex.py",
    "chars": 1836,
    "preview": "import json\nimport os\n\nimport numpy as np\n\n\ndef compute_final_metrics(metric: str, dir: str, exp_base_name: str):\n    qu"
  },
  {
    "path": "abacus-research/setup_cuad_data.py",
    "chars": 2222,
    "preview": "#!/usr/bin/env python\n\"\"\"\nScript to download CUAD dataset and set up local data directory.\nThis replaces the need for Hu"
  },
  {
    "path": "demos/audio-demo.py",
    "chars": 1045,
    "preview": "import os\n\nimport kagglehub\n\nimport palimpzest as pz\n\n\nclass SmallAudioDataset(pz.AudioFileDataset):\n    def __init__(se"
  },
  {
    "path": "demos/caching-demo.py",
    "chars": 12407,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nRealistic Demo showcasing prompt caching capabilities in Palimpzest.\n\nThis demo processes mul"
  },
  {
    "path": "demos/demo_core.py",
    "chars": 9219,
    "preview": "#!/usr/bin/env python3\nimport json\nimport os\n\nimport pandas as pd\nfrom tabulate import tabulate\n\nimport palimpzest as pz"
  },
  {
    "path": "demos/enron-demo.py",
    "chars": 3680,
    "preview": "import json\nimport os\n\nimport palimpzest as pz\nfrom palimpzest.core.lib.schemas import TextFile\n\n\nclass EnronValidator(p"
  },
  {
    "path": "demos/image-demo.py",
    "chars": 2112,
    "preview": "#!/usr/bin/env python3\n\"\"\"This scripts is a demo for image processing, it is simply an abridged version of simpleDemo.py"
  },
  {
    "path": "demos/join-data/animal-texts/animal1.txt",
    "chars": 41,
    "preview": "The quick red fox jumped over the fence.\n"
  },
  {
    "path": "demos/join-data/animal-texts/animal2.txt",
    "chars": 35,
    "preview": "The black dog sat next to the bed.\n"
  },
  {
    "path": "demos/join-data/animal-texts/animal3.txt",
    "chars": 47,
    "preview": "The white polar bear swam gently in the ocean.\n"
  },
  {
    "path": "demos/join-data/animal-texts/animal4.txt",
    "chars": 76,
    "preview": "The labrador swam in the lake, the sun glistening off its shiny black coat.\n"
  },
  {
    "path": "demos/join-data/animal-texts/animal5.txt",
    "chars": 28,
    "preview": "Clifford was a big red dog.\n"
  },
  {
    "path": "demos/join-data/animal-texts/animal6.txt",
    "chars": 32,
    "preview": "The elephant was wise and grey.\n"
  },
  {
    "path": "demos/join-demo.py",
    "chars": 2842,
    "preview": "import argparse\n\nimport palimpzest as pz\n\n# define columns for datasets\ntext_animal_cols = [\n    {\"name\": \"animal\", \"typ"
  },
  {
    "path": "demos/paper-demo.py",
    "chars": 13173,
    "preview": "import argparse\nimport json\nimport os\n\nimport gradio as gr\nimport numpy as np\nimport pandas as pd\nfrom PIL import Image\n"
  },
  {
    "path": "demos/real-estate-demo.py",
    "chars": 9745,
    "preview": "import argparse\nimport os\n\nimport gradio as gr\nimport numpy as np\nimport pandas as pd\nfrom PIL import Image\n\nimport pali"
  },
  {
    "path": "demos/simple-demo.py",
    "chars": 2738,
    "preview": "#!/usr/bin/env python3\nimport argparse\nimport os\nimport time\n\nfrom demo_core import execute_task, format_results_table\nf"
  },
  {
    "path": "demos/vllm-demo.py",
    "chars": 1967,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nMinimal demo for running a vLLM model with Palimpzest.\n\nPrerequisites:\n  1. Start a vLLM serv"
  },
  {
    "path": "evals/quest/eval.py",
    "chars": 4838,
    "preview": "import argparse\nimport copy\nimport json\nimport os\nimport random\nimport time\n\nimport palimpzest as pz\n\n\ndef prepare_docs_"
  },
  {
    "path": "pyproject.toml",
    "chars": 2339,
    "preview": "[project]\nname = \"palimpzest\"\nversion = \"1.5.3\"\ndescription = \"Palimpzest is a system which enables anyone to process AI"
  },
  {
    "path": "quickstart.ipynb",
    "chars": 39334,
    "preview": "{\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"dBfyB-7Hytwy\"\n      },\n      \"sou"
  },
  {
    "path": "ruff.toml",
    "chars": 261,
    "preview": "# Config https://docs.astral.sh/ruff/configuration/\nline-length = 120\nindent-width = 4\nexclude = [\"*.ipynb\"]\n\n# Assume P"
  },
  {
    "path": "scripts/capture_litellm_stats.py",
    "chars": 20047,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nScript to invoke LLM providers through LiteLLM and capture token/cost statistics.\n\nThis scrip"
  },
  {
    "path": "scripts/capture_provider_stats.py",
    "chars": 30879,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nScript to directly invoke LLM providers and capture token/cost statistics.\n\nThis script:\n1. L"
  },
  {
    "path": "scripts/generate_test_messages.py",
    "chars": 15580,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nScript to generate test messages for each provider/modality combination.\n\nThis script uses th"
  },
  {
    "path": "scripts/update_model_info.py",
    "chars": 21927,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nScript to automatically update pz_models_information.json with data from external sources.\n\nD"
  },
  {
    "path": "src/palimpzest/__init__.py",
    "chars": 1694,
    "preview": "import logging\n\nfrom palimpzest.constants import Cardinality, Model\nfrom palimpzest.core.data.context import Context, Te"
  },
  {
    "path": "src/palimpzest/agents/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/agents/compute_agents.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/agents/search_agents.py",
    "chars": 29418,
    "preview": "import json\nimport textwrap\nimport time\nfrom collections.abc import Generator\nfrom typing import TYPE_CHECKING, Any\n\nfro"
  },
  {
    "path": "src/palimpzest/constants.py",
    "chars": 18567,
    "preview": "### This file contains constants used by Palimpzest ###\nfrom __future__ import annotations\n\nimport os\nfrom enum import E"
  },
  {
    "path": "src/palimpzest/core/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/core/data/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/core/data/context.py",
    "chars": 16207,
    "preview": "from __future__ import annotations\n\nimport os\nimport re\nfrom abc import ABC\nfrom typing import Callable\n\nfrom pydantic i"
  },
  {
    "path": "src/palimpzest/core/data/context_manager.py",
    "chars": 6150,
    "preview": "from __future__ import annotations\n\nimport os\nimport pickle\n\nimport chromadb\nimport chromadb.utils.embedding_functions a"
  },
  {
    "path": "src/palimpzest/core/data/dataset.py",
    "chars": 31915,
    "preview": "from __future__ import annotations\n\nimport warnings\nfrom collections.abc import Iterator\nfrom typing import Callable\n\nfr"
  },
  {
    "path": "src/palimpzest/core/data/index_dataset.py",
    "chars": 1929,
    "preview": "from __future__ import annotations\n\nfrom abc import ABC, abstractmethod\n\nfrom chromadb.api.models.Collection import Coll"
  },
  {
    "path": "src/palimpzest/core/data/iter_dataset.py",
    "chars": 20507,
    "preview": "from __future__ import annotations\n\nimport base64\nimport os\nfrom abc import ABC, abstractmethod\nfrom io import BytesIO\nf"
  },
  {
    "path": "src/palimpzest/core/elements/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/core/elements/filters.py",
    "chars": 1642,
    "preview": "from __future__ import annotations\n\nfrom typing import Any, Callable\n\n\n#############################\n# Filters that can "
  },
  {
    "path": "src/palimpzest/core/elements/groupbysig.py",
    "chars": 2955,
    "preview": "from __future__ import annotations\n\nfrom typing import Any\n\nfrom pydantic import BaseModel\n\nfrom palimpzest.core.lib.sch"
  },
  {
    "path": "src/palimpzest/core/elements/records.py",
    "chars": 17967,
    "preview": "from __future__ import annotations\n\nimport json\nfrom collections.abc import Generator\nfrom copy import deepcopy\nfrom typ"
  },
  {
    "path": "src/palimpzest/core/lib/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/core/lib/schemas.py",
    "chars": 10413,
    "preview": "from __future__ import annotations\n\nimport sys\nfrom typing import Any, TypeAliasType\n\nimport pandas as pd\nfrom pydantic "
  },
  {
    "path": "src/palimpzest/core/models.py",
    "chars": 43991,
    "preview": "from __future__ import annotations\n\nimport json\nimport time\nfrom abc import abstractmethod\nfrom typing import Any\n\nfrom "
  },
  {
    "path": "src/palimpzest/policy.py",
    "chars": 11628,
    "preview": "from __future__ import annotations\n\nimport json\n\nfrom palimpzest.core.models import PlanCost\n\n\ndef construct_policy_from"
  },
  {
    "path": "src/palimpzest/prompts/__init__.py",
    "chars": 1728,
    "preview": "from palimpzest.prompts.agent_prompts import (\n    CODE_AGENT_SYSTEM_PROMPT,\n    DATA_DISCOVERY_AGENT_INITIAL_PLAN_PROMP"
  },
  {
    "path": "src/palimpzest/prompts/agent_prompts.py",
    "chars": 17459,
    "preview": "CODE_AGENT_SYSTEM_PROMPT = \"\"\"You are an expert assistant who can solve any task using code blobs. You will be given a t"
  },
  {
    "path": "src/palimpzest/prompts/aggregate_prompts.py",
    "chars": 3464,
    "preview": "\"\"\"This file contains prompts for aggregation operations.\"\"\"\n\n### BASE PROMPTS ###\nAGG_BASE_SYSTEM_PROMPT = \"\"\"You are a"
  },
  {
    "path": "src/palimpzest/prompts/context_search.py",
    "chars": 357,
    "preview": "\nCONTEXT_SEARCH_PROMPT = \"\"\"You are a helpful agent whose job is to propose search queries that will assist in finding i"
  },
  {
    "path": "src/palimpzest/prompts/convert_prompts.py",
    "chars": 3158,
    "preview": "\"\"\"This file contains prompts for convert operations.\"\"\"\n\n### BASE PROMPTS ###\nMAP_BASE_SYSTEM_PROMPT = \"\"\"You are a hel"
  },
  {
    "path": "src/palimpzest/prompts/critique_and_refine_prompts.py",
    "chars": 2857,
    "preview": "\"\"\"This file contains prompts for CritiqueAndRefineConvert operations.\"\"\"\n\n### CRITIQUE PROMPT AND CRITERIA ###\nBASE_CRI"
  },
  {
    "path": "src/palimpzest/prompts/filter_prompts.py",
    "chars": 2374,
    "preview": "\"\"\"This file contains prompts for filter operations.\"\"\"\n\n### BASE PROMPTS ###\nFILTER_BASE_SYSTEM_PROMPT = \"\"\"You are a h"
  },
  {
    "path": "src/palimpzest/prompts/join_prompts.py",
    "chars": 2952,
    "preview": "\"\"\"This file contains prompts for join operations.\"\"\"\n\n### BASE PROMPTS ###\nJOIN_BASE_SYSTEM_PROMPT = \"\"\"You are a helpf"
  },
  {
    "path": "src/palimpzest/prompts/moa_aggregator_prompts.py",
    "chars": 5284,
    "preview": "\"\"\"This file contains prompts for Mixture-of-Agents aggregator operations.\"\"\"\n\n### SYSTEM PROMPTS ###\nMAP_MOA_AGG_BASE_S"
  },
  {
    "path": "src/palimpzest/prompts/moa_proposer_prompts.py",
    "chars": 3161,
    "preview": "\"\"\"This file contains prompts for MixtureOfAgentsConvert operations.\"\"\"\n\n### SYSTEM PROMPTS ###\nMAP_MOA_PROPOSER_BASE_SY"
  },
  {
    "path": "src/palimpzest/prompts/prompt_factory.py",
    "chars": 51311,
    "preview": "\"\"\"This file contains factory methods which return template prompts and return messages for chat payloads.\"\"\"\n\nimport ba"
  },
  {
    "path": "src/palimpzest/prompts/prompt_manager.py",
    "chars": 10067,
    "preview": "\"\"\"\nPrompt caching utility for different LLM providers.\n\nThis module provides provider-specific prompt caching configura"
  },
  {
    "path": "src/palimpzest/prompts/split_merge_prompts.py",
    "chars": 5609,
    "preview": "\"\"\"This file contains prompts for SplitConvert aggregator operations.\"\"\"\n\n### SYSTEM PROMPTS ###\nMAP_SPLIT_MERGER_BASE_S"
  },
  {
    "path": "src/palimpzest/prompts/split_proposer_prompts.py",
    "chars": 3151,
    "preview": "\"\"\"This file contains prompts for SplitAndMerge operations.\"\"\"\n\n### SYSTEM PROMPTS ###\nMAP_SPLIT_PROPOSER_BASE_SYSTEM_PR"
  },
  {
    "path": "src/palimpzest/prompts/utils.py",
    "chars": 7497,
    "preview": "\"\"\"This file contains utility format strings which are templated into many of our prompts.\"\"\"\n\n### FORMATTING INSTRUCTIO"
  },
  {
    "path": "src/palimpzest/prompts/validator.py",
    "chars": 10508,
    "preview": "### MAP ###\nMAP_VALIDATOR_PROMPT = \"\"\"You are an intelligent judge whose job is to evaluate how successfully an agent ex"
  },
  {
    "path": "src/palimpzest/query/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/query/execution/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/query/execution/all_sample_execution_strategy.py",
    "chars": 14369,
    "preview": "import logging\n\nimport numpy as np\n\nfrom palimpzest.core.data.dataset import Dataset\nfrom palimpzest.core.elements.recor"
  },
  {
    "path": "src/palimpzest/query/execution/execution_strategy.py",
    "chars": 19248,
    "preview": "import logging\nfrom abc import ABC, abstractmethod\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\n\nimpo"
  },
  {
    "path": "src/palimpzest/query/execution/execution_strategy_type.py",
    "chars": 987,
    "preview": "from enum import Enum\n\nfrom palimpzest.query.execution.all_sample_execution_strategy import AllSamplingExecutionStrategy"
  },
  {
    "path": "src/palimpzest/query/execution/mab_execution_strategy.py",
    "chars": 48417,
    "preview": "\nimport logging\n\nimport numpy as np\nfrom chromadb.api.models.Collection import Collection\n\nfrom palimpzest.core.data.dat"
  },
  {
    "path": "src/palimpzest/query/execution/parallel_execution_strategy.py",
    "chars": 17589,
    "preview": "import logging\nfrom concurrent.futures import ThreadPoolExecutor, wait\n\nfrom palimpzest.constants import PARALLEL_EXECUT"
  },
  {
    "path": "src/palimpzest/query/execution/single_threaded_execution_strategy.py",
    "chars": 17464,
    "preview": "import logging\n\nfrom palimpzest.core.elements.records import DataRecord\nfrom palimpzest.core.models import PlanStats\nfro"
  },
  {
    "path": "src/palimpzest/query/generators/__init__.py",
    "chars": 203,
    "preview": "from palimpzest.query.generators.gemini_client import GeminiClient, GeminiResponse\nfrom palimpzest.query.generators.gene"
  },
  {
    "path": "src/palimpzest/query/generators/gemini_client.py",
    "chars": 11810,
    "preview": "\"\"\"\nDirect client for Gemini (Google AI Studio and Vertex AI) that bypasses litellm.\n\nThis module provides a GeminiClien"
  },
  {
    "path": "src/palimpzest/query/generators/generators.py",
    "chars": 24964,
    "preview": "\"\"\"\nThis file contains the Generator classes and generator factory.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport json"
  },
  {
    "path": "src/palimpzest/query/operators/__init__.py",
    "chars": 4734,
    "preview": "from palimpzest.query.operators.aggregate import AggregateOp as _AggregateOp\nfrom palimpzest.query.operators.aggregate i"
  },
  {
    "path": "src/palimpzest/query/operators/aggregate.py",
    "chars": 27443,
    "preview": "from __future__ import annotations\n\nimport contextlib\nimport time\nfrom typing import Any\n\nfrom palimpzest.constants impo"
  },
  {
    "path": "src/palimpzest/query/operators/compute.py",
    "chars": 8564,
    "preview": "import functools\nimport inspect\nimport os\nimport time\nfrom typing import Any\n\nfrom smolagents import CodeAgent, LiteLLMM"
  },
  {
    "path": "src/palimpzest/query/operators/convert.py",
    "chars": 16160,
    "preview": "from __future__ import annotations\n\nimport time\nfrom abc import ABC, abstractmethod\nfrom typing import Callable\n\nfrom py"
  },
  {
    "path": "src/palimpzest/query/operators/critique_and_refine.py",
    "chars": 8886,
    "preview": "from __future__ import annotations\n\nfrom typing import Any\n\nfrom pydantic.fields import FieldInfo\n\nfrom palimpzest.const"
  },
  {
    "path": "src/palimpzest/query/operators/distinct.py",
    "chars": 2614,
    "preview": "from __future__ import annotations\n\nfrom palimpzest.core.elements.records import DataRecord, DataRecordSet\nfrom palimpze"
  },
  {
    "path": "src/palimpzest/query/operators/filter.py",
    "chars": 10466,
    "preview": "from __future__ import annotations\n\nimport time\nfrom abc import ABC, abstractmethod\nfrom typing import Any\n\nfrom pydanti"
  },
  {
    "path": "src/palimpzest/query/operators/join.py",
    "chars": 38612,
    "preview": "from __future__ import annotations\n\nimport threading\nimport time\nfrom abc import ABC, abstractmethod\nfrom concurrent.fut"
  },
  {
    "path": "src/palimpzest/query/operators/limit.py",
    "chars": 2130,
    "preview": "from __future__ import annotations\n\nfrom palimpzest.core.elements.records import DataRecord, DataRecordSet\nfrom palimpze"
  },
  {
    "path": "src/palimpzest/query/operators/logical.py",
    "chars": 21160,
    "preview": "from __future__ import annotations\n\nimport json\nfrom typing import Callable\n\nfrom pydantic import BaseModel\n\nfrom palimp"
  },
  {
    "path": "src/palimpzest/query/operators/mixture_of_agents.py",
    "chars": 11578,
    "preview": "from __future__ import annotations\n\nfrom pydantic.fields import FieldInfo\n\nfrom palimpzest.constants import Cardinality,"
  },
  {
    "path": "src/palimpzest/query/operators/physical.py",
    "chars": 9664,
    "preview": "from __future__ import annotations\n\nimport json\n\nfrom pydantic import BaseModel\n\nfrom palimpzest.constants import Modali"
  },
  {
    "path": "src/palimpzest/query/operators/project.py",
    "chars": 2111,
    "preview": "from __future__ import annotations\n\nfrom palimpzest.core.elements.records import DataRecord, DataRecordSet\nfrom palimpze"
  },
  {
    "path": "src/palimpzest/query/operators/rag.py",
    "chars": 20116,
    "preview": "from __future__ import annotations\n\nimport time\nfrom typing import Any\n\nfrom litellm import embedding as litellm_embeddi"
  },
  {
    "path": "src/palimpzest/query/operators/scan.py",
    "chars": 7407,
    "preview": "from __future__ import annotations\n\nimport time\nfrom abc import ABC, abstractmethod\nfrom typing import Any\n\nfrom palimpz"
  },
  {
    "path": "src/palimpzest/query/operators/search.py",
    "chars": 22843,
    "preview": "import functools\nimport inspect\nimport os\nimport time\nfrom typing import Any\n\n# from mem0 import Memory\nfrom smolagents "
  },
  {
    "path": "src/palimpzest/query/operators/split.py",
    "chars": 14902,
    "preview": "from __future__ import annotations\n\nimport math\n\nfrom pydantic.fields import FieldInfo\n\nfrom palimpzest.constants import"
  },
  {
    "path": "src/palimpzest/query/operators/topk.py",
    "chars": 13843,
    "preview": "from __future__ import annotations\n\nimport os\nimport threading\nimport time\nfrom typing import Callable\n\nfrom chromadb.ap"
  },
  {
    "path": "src/palimpzest/query/optimizer/__init__.py",
    "chars": 2799,
    "preview": "from palimpzest.query.optimizer.rules import AddContextsBeforeComputeRule as _AddContextsBeforeComputeRule\nfrom palimpze"
  },
  {
    "path": "src/palimpzest/query/optimizer/cost_model.py",
    "chars": 12956,
    "preview": "from __future__ import annotations\n\nimport logging\nimport warnings\n\nimport pandas as pd\n\nfrom palimpzest.constants impor"
  },
  {
    "path": "src/palimpzest/query/optimizer/optimizer.py",
    "chars": 19288,
    "preview": "from __future__ import annotations\n\nimport logging\nfrom copy import deepcopy\n\nfrom pydantic.fields import FieldInfo\n\nfro"
  },
  {
    "path": "src/palimpzest/query/optimizer/optimizer_strategy.py",
    "chars": 10042,
    "preview": "from __future__ import annotations\n\nimport logging\nfrom abc import ABC, abstractmethod\n\nfrom palimpzest.policy import Po"
  },
  {
    "path": "src/palimpzest/query/optimizer/optimizer_strategy_type.py",
    "chars": 1084,
    "preview": "from enum import Enum\n\nfrom palimpzest.query.optimizer.optimizer_strategy import (\n    GreedyStrategy,\n    NoOptimizatio"
  },
  {
    "path": "src/palimpzest/query/optimizer/plan.py",
    "chars": 22792,
    "preview": "from __future__ import annotations\n\nfrom abc import ABC, abstractmethod\n\nfrom palimpzest.core.models import PlanCost\nfro"
  },
  {
    "path": "src/palimpzest/query/optimizer/primitives.py",
    "chars": 5445,
    "preview": "from __future__ import annotations\n\nfrom pydantic.fields import FieldInfo\n\nfrom palimpzest.query.operators.logical impor"
  },
  {
    "path": "src/palimpzest/query/optimizer/rules.py",
    "chars": 56278,
    "preview": "import logging\nimport os\nfrom copy import deepcopy\nfrom itertools import combinations, product\n\nfrom palimpzest.constant"
  },
  {
    "path": "src/palimpzest/query/optimizer/tasks.py",
    "chars": 30455,
    "preview": "from __future__ import annotations\n\nimport logging\nfrom typing import Any\n\nfrom palimpzest.core.models import PlanCost\nf"
  },
  {
    "path": "src/palimpzest/query/processor/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/query/processor/config.py",
    "chars": 2893,
    "preview": "from __future__ import annotations\n\nfrom pydantic import BaseModel, ConfigDict, Field\n\nfrom palimpzest.constants import "
  },
  {
    "path": "src/palimpzest/query/processor/query_processor.py",
    "chars": 6281,
    "preview": "import logging\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.data.dataset import Dataset\nfrom palimpzest."
  },
  {
    "path": "src/palimpzest/query/processor/query_processor_factory.py",
    "chars": 12101,
    "preview": "import logging\nimport os\nfrom enum import Enum\n\nfrom dotenv import load_dotenv\n\nfrom palimpzest.constants import Model\nf"
  },
  {
    "path": "src/palimpzest/schemabuilder/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/schemabuilder/schema_builder.py",
    "chars": 7968,
    "preview": "\"\"\"\nThis module is responsible for building schemas dynamically, taking an input file and generating a schema for it.\nTh"
  },
  {
    "path": "src/palimpzest/tools/README.md",
    "chars": 483,
    "preview": "# Palimpsest Tools Catalog\nThis is a directory of tools we have for the palimpzest.\n\n## Tools\nFor the pdf precessing too"
  },
  {
    "path": "src/palimpzest/tools/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/tools/allenpdf.py",
    "chars": 1690,
    "preview": "import modal\n\napp = modal.App(\"palimpzest.tools\")\npip_packs = [\n    \"papermage\",\n    \"tqdm\",\n    \"transformers\",\n    \"pd"
  },
  {
    "path": "src/palimpzest/tools/pdfparser.py",
    "chars": 9570,
    "preview": "import hashlib\nimport io\nimport json\nimport os\nimport time\nfrom typing import BinaryIO\nfrom zipfile import ZipFile\n\nimpo"
  },
  {
    "path": "src/palimpzest/tools/skema_tools.py",
    "chars": 669,
    "preview": "#####################################################\n#\n#####################################################\n# Descript"
  },
  {
    "path": "src/palimpzest/utils/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/utils/env_helpers.py",
    "chars": 334,
    "preview": "import os\nimport sys\n\n\ndef load_env():\n    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), \"."
  },
  {
    "path": "src/palimpzest/utils/hash_helpers.py",
    "chars": 334,
    "preview": "import hashlib\nimport json\n\nfrom palimpzest.constants import MAX_ID_CHARS\n\n\ndef hash_for_id(id_str: str, max_chars: int "
  },
  {
    "path": "src/palimpzest/utils/model_helpers.py",
    "chars": 9418,
    "preview": "import os\n\nfrom palimpzest.constants import MAX_AVAILABLE_MODELS, Model\nfrom palimpzest.core.models import PlanCost\nfrom"
  },
  {
    "path": "src/palimpzest/utils/model_info_helpers.py",
    "chars": 15988,
    "preview": "import logging\nimport re\nfrom typing import Any\n\nimport requests\n\nlogger = logging.getLogger(__name__)\n\nPZ_MODEL_DATA_UR"
  },
  {
    "path": "src/palimpzest/utils/progress.py",
    "chars": 22078,
    "preview": "import time\nfrom abc import ABC, abstractmethod\nfrom dataclasses import dataclass\n\nfrom chromadb.api.models.Collection i"
  },
  {
    "path": "src/palimpzest/utils/pz_models_information.json",
    "chars": 34352,
    "preview": "{\n    \"together_ai/meta-llama/Llama-3.2-3B-Instruct-Turbo\": {\n        \"usd_per_input_token\": 6e-08,\n        \"usd_per_out"
  },
  {
    "path": "src/palimpzest/utils/udfs.py",
    "chars": 1854,
    "preview": "\"\"\"\nThis file collects a sample of useful UDFs to convert schemata.\n\"\"\"\n\nimport io\nfrom datetime import datetime\n\nimport"
  },
  {
    "path": "src/palimpzest/validator/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/palimpzest/validator/validator.py",
    "chars": 16453,
    "preview": "import json\nimport time\n\nimport litellm\n\n# from colorama import Fore, Style\nfrom palimpzest.constants import Cardinality"
  },
  {
    "path": "testdata/README.md",
    "chars": 810,
    "preview": "## Note About Datasets Used in Evaluation\nEnron is run using the `enron-eval` dataset\n\nReal Estate is run using the `rea"
  },
  {
    "path": "testdata/download-testdata.sh",
    "chars": 3233,
    "preview": "#!/bin/bash\n#This script can be used to download and extract the test data for the palimpzest demos\n# Usage: bash testda"
  },
  {
    "path": "testdata/enron-eval-medium-labels.json",
    "chars": 8534,
    "preview": "{\"allen-p-inbox-45.txt\": [], \"crandell-s-inbox-241.txt\": [], \"blair-l-inbox-267.txt\": [], \"forney-j-inbox-149.txt\": [], "
  },
  {
    "path": "testdata/target_matching.csv",
    "chars": 2324,
    "preview": "target_attribute,li,cao,clark,dou,gilette,huang,krug,mcdermott,satpathy,vasaikar,wang\r\ncase_submitter_id,Patient_ID,case"
  },
  {
    "path": "tests/pytest/README.md",
    "chars": 388,
    "preview": "## Testing with Pytest in Palimpzest\n- tests in `test_*.py` files\n- fixtures in `conftest.py` and `fixtures/` (these are"
  },
  {
    "path": "tests/pytest/conftest.py",
    "chars": 9687,
    "preview": "import pytest\n\nfrom palimpzest.policy import MaxQuality, MaxQualityAtFixedCost, MinCost, MinCostAtFixedQuality\n\npytest_p"
  },
  {
    "path": "tests/pytest/data/email_schema.json",
    "chars": 436,
    "preview": "{\n    \"name\": \"Email\",\n    \"type\": \"TextFile\",\n    \"description\": \"Represents an email, which in practice is usually fro"
  },
  {
    "path": "tests/pytest/data/email_schema.yml",
    "chars": 266,
    "preview": "schema:\n  name: Email\n  type: TextFile\n  description: Represents an email, which in practice is usually from a text file"
  },
  {
    "path": "tests/pytest/data/synapse_schema.csv",
    "chars": 812,
    "preview": "resourceType,dataType,dataSubtype,individualID,specimenID,cellType,assay,isCellLine,diagnosis,organ,platform,sex,species"
  },
  {
    "path": "tests/pytest/data/synapse_schema.jsonld",
    "chars": 1248838,
    "preview": "{\n    \"@context\": {\n        \"bts\": \"http://schema.biothings.io/\",\n        \"rdf\": \"http://www.w3.org/1999/02/22-rdf-synta"
  },
  {
    "path": "tests/pytest/fixtures/champion_outputs.py",
    "chars": 8240,
    "preview": "import pytest\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.elements.records import DataRecord, DataRecor"
  },
  {
    "path": "tests/pytest/fixtures/datasets.py",
    "chars": 3072,
    "preview": "import os\nfrom pathlib import Path\n\nimport pytest\n\nfrom palimpzest.core.data.iter_dataset import IterDataset, TextFileDa"
  },
  {
    "path": "tests/pytest/fixtures/execution_data.py",
    "chars": 19397,
    "preview": "import pytest\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.elements.records import DataRecord, DataRecor"
  },
  {
    "path": "tests/pytest/fixtures/expected_physical_plans.py",
    "chars": 17011,
    "preview": "from copy import deepcopy\n\nimport pytest\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.elements.filters i"
  },
  {
    "path": "tests/pytest/fixtures/expected_qualities.py",
    "chars": 12282,
    "preview": "import pytest\n\nfrom palimpzest.constants import Model\n\n\n# NOTE: this relies on knowledge of the fixtures in fixtures/exe"
  },
  {
    "path": "tests/pytest/fixtures/expected_records.py",
    "chars": 5710,
    "preview": "import os\n\nimport pytest\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.elements.records import DataRecord"
  },
  {
    "path": "tests/pytest/fixtures/models.py",
    "chars": 244,
    "preview": "from os import getenv\n\nimport pytest\n\nfrom palimpzest.constants import Model\n\n\n@pytest.fixture\ndef embedding_text_only_m"
  },
  {
    "path": "tests/pytest/fixtures/operator_to_stats.py",
    "chars": 34861,
    "preview": "from copy import deepcopy\n\nimport pytest\n\nfrom palimpzest.constants import Model\nfrom palimpzest.core.elements.filters i"
  },
  {
    "path": "tests/pytest/fixtures/physical_plans.py",
    "chars": 6522,
    "preview": "import pytest\n\nfrom palimpzest.constants import Cardinality, Model\nfrom palimpzest.core.data.iter_dataset import MemoryD"
  },
  {
    "path": "tests/pytest/fixtures/schemas.py",
    "chars": 4434,
    "preview": "from typing import Any\n\nimport pytest\nfrom pydantic import BaseModel, Field\n\nfrom palimpzest.core.lib.schemas import Ima"
  },
  {
    "path": "tests/pytest/fixtures/side_effects.py",
    "chars": 3197,
    "preview": "import pytest\n\nfrom palimpzest.core.models import GenerationStats\n\n\n### Side-Effects for Mocking LLM Calls ###\n@pytest.f"
  },
  {
    "path": "tests/pytest/fixtures/workloads.py",
    "chars": 3849,
    "preview": "import pytest\n\n\n### UDFs ###\ndef within_two_miles_of_mit(record):\n    # NOTE: I'm using this hard-coded function so that"
  },
  {
    "path": "tests/pytest/test_aggregate.py",
    "chars": 4938,
    "preview": "\"\"\"This script contains tests for physical operators for semantic aggregation.\"\"\"\n\nimport os\n\nimport pytest\nfrom pydanti"
  },
  {
    "path": "tests/pytest/test_convert.py",
    "chars": 1876,
    "preview": "\"\"\"This testing class is an integration test suite.\nWhat it does is consider one of the demo scenarios and test whether "
  }
]

// ... and 50 more files (download for full content)

About this extraction

This page contains the full source code of the mitdbg/palimpzest GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 250 files (3.6 MB), approximately 957.3k tokens, and a symbol index with 1755 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo