Repository: powdr-labs/powdr
Branch: main
Commit: 70a2ad887478
Files: 308
Total size: 30.0 MB

Directory structure:
gitextract_74kaprsg/

├── .config/
│   └── nextest.toml
├── .gitattributes
├── .github/
│   ├── actions/
│   │   ├── init-testing-instance/
│   │   │   └── action.yml
│   │   ├── init-testing-instance-gpu/
│   │   │   └── action.yml
│   │   └── patch-openvm-reth-benchmark/
│   │       └── action.yml
│   ├── runner/
│   │   └── Dockerfile
│   └── workflows/
│       ├── build-cache.yml
│       ├── dead-links.yml
│       ├── nightly-analyze.yml
│       ├── nightly-tests.yml
│       ├── post-merge-tests.yml
│       ├── pr-tests-with-secrets.yml
│       └── pr-tests.yml
├── .gitignore
├── CLAUDE.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── autoprecompile-analyzer/
│   ├── Claude.md
│   └── index.html
├── autoprecompiles/
│   ├── Cargo.toml
│   ├── benches/
│   │   └── optimizer_benchmark.rs
│   ├── scripts/
│   │   ├── plot_effectiveness.py
│   │   ├── rank_apc_candidates.py
│   │   ├── readme.md
│   │   └── requirements.txt
│   ├── src/
│   │   ├── adapter.rs
│   │   ├── blocks/
│   │   │   ├── detection.rs
│   │   │   └── mod.rs
│   │   ├── bus_map.rs
│   │   ├── constraint_optimizer.rs
│   │   ├── empirical_constraints.rs
│   │   ├── equivalence_classes.rs
│   │   ├── evaluation.rs
│   │   ├── execution/
│   │   │   ├── ast.rs
│   │   │   ├── candidates.rs
│   │   │   ├── evaluator.rs
│   │   │   └── mod.rs
│   │   ├── execution_profile.rs
│   │   ├── export.rs
│   │   ├── expression.rs
│   │   ├── expression_conversion.rs
│   │   ├── lib.rs
│   │   ├── low_degree_bus_interaction_optimizer.rs
│   │   ├── memory_optimizer.rs
│   │   ├── optimistic/
│   │   │   ├── algebraic_references.rs
│   │   │   ├── config.rs
│   │   │   ├── execution_constraint_generator.rs
│   │   │   ├── execution_literals.rs
│   │   │   └── mod.rs
│   │   ├── optimizer.rs
│   │   ├── optimizer_documentation.md
│   │   ├── pgo/
│   │   │   ├── cell/
│   │   │   │   ├── mod.rs
│   │   │   │   └── selection.rs
│   │   │   ├── instruction.rs
│   │   │   ├── mod.rs
│   │   │   └── none.rs
│   │   ├── powdr.rs
│   │   ├── range_constraint_optimizer.rs
│   │   ├── stats_logger.rs
│   │   ├── symbolic_machine.rs
│   │   ├── symbolic_machine_generator.rs
│   │   └── trace_handler.rs
│   └── tests/
│       └── optimizer.rs
├── cli-openvm-riscv/
│   ├── Cargo.toml
│   ├── README.md
│   └── src/
│       └── main.rs
├── constraint-solver/
│   ├── Cargo.toml
│   ├── src/
│   │   ├── algebraic_constraint/
│   │   │   ├── mod.rs
│   │   │   └── solve.rs
│   │   ├── bus_interaction_handler.rs
│   │   ├── constraint_system.rs
│   │   ├── effect.rs
│   │   ├── grouped_expression.rs
│   │   ├── indexed_constraint_system.rs
│   │   ├── inliner.rs
│   │   ├── lib.rs
│   │   ├── range_constraint.rs
│   │   ├── reachability.rs
│   │   ├── rule_based_optimizer/
│   │   │   ├── driver.rs
│   │   │   ├── environment.rs
│   │   │   ├── item_db.rs
│   │   │   ├── mod.rs
│   │   │   ├── new_var_generator.rs
│   │   │   ├── rules.rs
│   │   │   ├── tests.rs
│   │   │   └── types.rs
│   │   ├── runtime_constant.rs
│   │   ├── solver/
│   │   │   ├── base.rs
│   │   │   ├── boolean_extractor.rs
│   │   │   ├── constraint_splitter.rs
│   │   │   ├── exhaustive_search.rs
│   │   │   ├── linearizer.rs
│   │   │   └── var_transformation.rs
│   │   ├── solver.rs
│   │   ├── symbolic_expression.rs
│   │   ├── system_splitter.rs
│   │   ├── test_utils.rs
│   │   ├── utils.rs
│   │   └── variable_update.rs
│   └── tests/
│       └── solver.rs
├── expression/
│   ├── Cargo.toml
│   └── src/
│       ├── display.rs
│       ├── lib.rs
│       └── visitors.rs
├── isa-utils/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── number/
│   ├── Cargo.toml
│   └── src/
│       ├── baby_bear.rs
│       ├── bn254.rs
│       ├── expression_convertible.rs
│       ├── goldilocks.rs
│       ├── koala_bear.rs
│       ├── lib.rs
│       ├── macros.rs
│       ├── mersenne31.rs
│       ├── plonky3_macros.rs
│       ├── serialize.rs
│       └── traits.rs
├── openvm/
│   ├── Cargo.toml
│   ├── build.rs
│   ├── cuda/
│   │   └── src/
│   │       ├── apc_apply_bus.cu
│   │       ├── apc_tracegen.cu
│   │       └── expr_eval.cuh
│   ├── metrics-viewer/
│   │   ├── CLAUDE.md
│   │   ├── index.html
│   │   └── spec.py
│   └── src/
│       ├── air_builder.rs
│       ├── cuda_abi.rs
│       ├── customize_exe.rs
│       ├── empirical_constraints.rs
│       ├── extraction_utils.rs
│       ├── isa.rs
│       ├── lib.rs
│       ├── powdr_extension/
│       │   ├── chip.rs
│       │   ├── executor/
│       │   │   └── mod.rs
│       │   ├── mod.rs
│       │   ├── opcode.rs
│       │   ├── trace_generator/
│       │   │   ├── common.rs
│       │   │   ├── cpu/
│       │   │   │   ├── inventory.rs
│       │   │   │   ├── mod.rs
│       │   │   │   └── periphery.rs
│       │   │   ├── cuda/
│       │   │   │   ├── inventory.rs
│       │   │   │   ├── mod.rs
│       │   │   │   └── periphery.rs
│       │   │   └── mod.rs
│       │   └── vm.rs
│       ├── program.rs
│       ├── test_utils.rs
│       ├── trace_generation.rs
│       └── utils.rs
├── openvm-bus-interaction-handler/
│   ├── Cargo.toml
│   └── src/
│       ├── bitwise_lookup.rs
│       ├── bus_map.rs
│       ├── lib.rs
│       ├── memory.rs
│       ├── memory_bus_interaction.rs
│       ├── tuple_range_checker.rs
│       └── variable_range_checker.rs
├── openvm-riscv/
│   ├── .gitignore
│   ├── Cargo.toml
│   ├── extensions/
│   │   ├── hints-circuit/
│   │   │   ├── Cargo.toml
│   │   │   └── src/
│   │   │       ├── executors.rs
│   │   │       ├── field10x26_k256.rs
│   │   │       └── lib.rs
│   │   ├── hints-guest/
│   │   │   ├── Cargo.toml
│   │   │   └── src/
│   │   │       └── lib.rs
│   │   └── hints-transpiler/
│   │       ├── Cargo.toml
│   │       └── src/
│   │           └── lib.rs
│   ├── guest/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-ecc-manual/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-ecc-powdr-affine-hint/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-ecc-projective/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-ecrecover/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-ecrecover-manual/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-hints-test/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-keccak/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-keccak-manual-precompile/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-matmul/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-pairing/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-pairing-manual-precompile/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-sha256/
│   │   ├── Cargo.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-sha256-manual-precompile/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-u256/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── guest-u256-manual-precompile/
│   │   ├── Cargo.toml
│   │   ├── openvm.toml
│   │   └── src/
│   │       └── main.rs
│   ├── scripts/
│   │   ├── basic_metrics.py
│   │   ├── generate_bench_results_readme.py
│   │   ├── metrics_utils.py
│   │   ├── plot_trace_cells.py
│   │   ├── readme.md
│   │   ├── requirements.txt
│   │   └── run_guest_benches.sh
│   ├── src/
│   │   ├── isa/
│   │   │   ├── instruction_formatter.rs
│   │   │   ├── mod.rs
│   │   │   ├── opcode.rs
│   │   │   ├── symbolic_instruction_builder.rs
│   │   │   └── trace_generator/
│   │   │       ├── common.rs
│   │   │       ├── cpu.rs
│   │   │       ├── cuda.rs
│   │   │       └── mod.rs
│   │   └── lib.rs
│   └── tests/
│       ├── apc_builder_complex.rs
│       ├── apc_builder_pseudo_instructions.rs
│       ├── apc_builder_single_instructions.rs
│       ├── apc_builder_superblocks.rs
│       ├── apc_reth_op_bug.cbor
│       ├── apc_snapshots/
│       │   ├── complex/
│       │   │   ├── aligned_memcpy.txt
│       │   │   ├── copy_byte.txt
│       │   │   ├── guest_top_block.txt
│       │   │   ├── load_two_bytes_compare.txt
│       │   │   ├── load_two_bytes_compare_unsigned.txt
│       │   │   ├── many_stores_relative_to_same_register.txt
│       │   │   ├── memcpy_block.txt
│       │   │   ├── rotate.txt
│       │   │   ├── stack_accesses.txt
│       │   │   ├── store_to_same_memory_address.txt
│       │   │   └── unaligned_memcpy.txt
│       │   ├── pseudo_instructions/
│       │   │   ├── beqz.txt
│       │   │   ├── bgez.txt
│       │   │   ├── bgtz.txt
│       │   │   ├── blez.txt
│       │   │   ├── bltz.txt
│       │   │   ├── bnez.txt
│       │   │   ├── j.txt
│       │   │   ├── jr.txt
│       │   │   ├── load_immediate.txt
│       │   │   ├── mv.txt
│       │   │   ├── neg.txt
│       │   │   ├── not.txt
│       │   │   ├── ret.txt
│       │   │   ├── seqz.txt
│       │   │   ├── sgtz.txt
│       │   │   ├── sltz.txt
│       │   │   └── snez.txt
│       │   ├── single_instructions/
│       │   │   ├── single_add_1.txt
│       │   │   ├── single_and_0.txt
│       │   │   ├── single_beq.txt
│       │   │   ├── single_bge.txt
│       │   │   ├── single_bgeu.txt
│       │   │   ├── single_blt.txt
│       │   │   ├── single_bltu.txt
│       │   │   ├── single_bne.txt
│       │   │   ├── single_div.txt
│       │   │   ├── single_divu.txt
│       │   │   ├── single_loadb.txt
│       │   │   ├── single_loadb_imm0.txt
│       │   │   ├── single_loadb_x0.txt
│       │   │   ├── single_loadbu.txt
│       │   │   ├── single_loadh.txt
│       │   │   ├── single_loadhu.txt
│       │   │   ├── single_loadw.txt
│       │   │   ├── single_mul.txt
│       │   │   ├── single_rem.txt
│       │   │   ├── single_remu.txt
│       │   │   ├── single_sll.txt
│       │   │   ├── single_sll_by_8.txt
│       │   │   ├── single_sra.txt
│       │   │   ├── single_srl.txt
│       │   │   ├── single_storeb.txt
│       │   │   ├── single_storeh.txt
│       │   │   ├── single_storew.txt
│       │   │   ├── single_sub.txt
│       │   │   └── single_xor.txt
│       │   └── superblocks/
│       │       ├── beq0_fallthrough.txt
│       │       ├── beq0_jump.txt
│       │       ├── beq_fallthrough.txt
│       │       ├── beq_jump.txt
│       │       └── many_blocks.txt
│       ├── common/
│       │   └── mod.rs
│       ├── keccak_apc_pre_opt.cbor
│       ├── machine_extraction.rs
│       └── openvm_constraints.txt
├── riscv-elf/
│   ├── Cargo.toml
│   └── src/
│       ├── bin/
│       │   └── elf-labels.rs
│       ├── debug_info.rs
│       ├── lib.rs
│       └── rv64.rs
├── riscv-types/
│   ├── Cargo.toml
│   └── src/
│       └── lib.rs
├── rust-toolchain.toml
├── scripts/
│   ├── analyze_nightly.py
│   └── update-dep.sh
└── syscalls/
    ├── Cargo.toml
    └── src/
        └── lib.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .config/nextest.toml
================================================
# Profiles to fail after a timeout, but continue with the other tests 
[profile.quick-10]
slow-timeout = { period = "10s", terminate-after = 1 }
fail-fast = false

[profile.quick-30]
slow-timeout = { period = "30s", terminate-after = 1 }
fail-fast = false

[profile.quick-60]
slow-timeout = { period = "60s", terminate-after = 1 }
fail-fast = false


================================================
FILE: .gitattributes
================================================
**/*.asm linguist-language=Rust
**/*.pil linguist-language=Rust


================================================
FILE: .github/actions/init-testing-instance/action.yml
================================================
name: "Init testing instance"
description: "Initialises a testing instance with all required tools and fetches the precomputed tests archive named `tests_archive_cpu`"

runs:
  using: "composite"
  steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download build artifacts (CPU)
        uses: actions/download-artifact@v4
        with:
          name: tests_archive_cpu
      - name: Install Rust toolchain nightly-2025-05-14 (with clippy and rustfmt)
        shell: bash
        run: rustup toolchain install nightly-2025-05-14 --component clippy,rustfmt,rust-src
      - name: Install riscv target
        shell: bash
        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2025-10-01
      - name: Install test dependencies
        shell: bash
        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
      - name: Install Rust deps
        shell: bash
        run: rustup install nightly-2025-10-01 --component rust-src
      - name: Install Rust deps
        shell: bash
        run: rustup install nightly-2025-02-14 --component rust-src
      - name: Install OpenVM guest toolchain (nightly-2025-08-02)
        shell: bash
        run: |
          rustup toolchain install nightly-2025-08-02
          rustup component add rust-src --toolchain nightly-2025-08-02
      - uses: taiki-e/install-action@nextest

================================================
FILE: .github/actions/init-testing-instance-gpu/action.yml
================================================
name: "Init testing instance (GPU)"
description: "Initialises a testing instance with all required tools and fetches the precomputed tests archive named `tests_archive_gpu`"

runs:
  using: "composite"
  steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Download build artifacts (GPU)
        uses: actions/download-artifact@v4
        with:
          name: tests_archive_gpu
      - name: Install Rust toolchain nightly-2025-05-14 (with clippy and rustfmt)
        shell: bash
        run: rustup toolchain install nightly-2025-05-14 --component clippy,rustfmt,rust-src
      - name: Install riscv target
        shell: bash
        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2025-10-01
      # TODO: runner on our GPU server has no sudo access, so we manually installed these; uncomment these once we have proper runners
      # - name: Install test dependencies
      #  shell: bash
      #  run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
      - name: Install Rust deps
        shell: bash
        run: rustup install nightly-2025-10-01 --component rust-src
      - name: Install Rust deps
        shell: bash
        run: rustup install nightly-2025-02-14 --component rust-src
      - name: Install OpenVM guest toolchain (nightly-2025-08-02)
        shell: bash
        run: |
          rustup toolchain install nightly-2025-08-02
          rustup component add rust-src --toolchain nightly-2025-08-02
      - uses: taiki-e/install-action@nextest


================================================
FILE: .github/actions/patch-openvm-reth-benchmark/action.yml
================================================
name: "Patch openvm-reth-benchmark"
description: "Checks out powdr-labs/openvm-reth-benchmark at a fixed ref and patches it to use local powdr crates"

runs:
  using: "composite"
  steps:
    - name: Checkout openvm-reth-benchmark
      uses: actions/checkout@v4
      with:
        repository: powdr-labs/openvm-reth-benchmark
        # Set once here — no inputs required elsewhere
        # Should always point to the latest main commit
        ref: 4a697fec23cb00849039f0bcaab5432929e05b38
        path: openvm-reth-benchmark

    - name: Patch openvm-reth-benchmark to use local powdr
      shell: bash
      run: |
        cd openvm-reth-benchmark
        mkdir -p .cargo
        cat <<'EOF' > .cargo/config.toml
        [patch."https://github.com/powdr-labs/powdr.git"]
        powdr-openvm-riscv = { path = "../openvm-riscv" }
        powdr-openvm = { path = "../openvm" }
        powdr-riscv-elf = { path = "../riscv-elf" }
        powdr-number = { path = "../number" }
        powdr-autoprecompiles = { path = "../autoprecompiles" }
        powdr-openvm-riscv-hints-circuit = { path = "../openvm-riscv/extensions/hints-circuit" }
        EOF


================================================
FILE: .github/runner/Dockerfile
================================================
#
# Runner for powdr github actions.
# We don't automate runner token generation yet. This image should be used as follows:
# - generate a runner token in github (valid for ~1h)
# - build the docker image passing the token as argument:
#   docker buildx build -t github-runner --build-arg TOKEN=THE_GENERATED_TOKEN .
# - this will create an image already registered it with github
# - the container will start the runner (./run.sh) by default.

# this base image was taken from the Dockerfile in the github runner repo
FROM mcr.microsoft.com/dotnet/runtime-deps:6.0-jammy AS build

ARG RUNNER_VERSION=2.319.1

RUN apt-get update && apt install -y curl \
    sudo \
    libicu70 \
    liblttng-ust1 \
    libkrb5-3 \
    zlib1g \
    libssl3 \
    git \
    build-essential \
    clang-15 \
    nlohmann-json3-dev \
    libpqxx-dev \
    nasm \
    libgmp-dev \
    uuid-dev \
    zstd

RUN adduser --disabled-password --uid 1001 runner \
    && usermod -aG sudo runner \
    && echo "%sudo   ALL=(ALL:ALL) NOPASSWD:ALL" > /etc/sudoers \
    && echo "Defaults env_keep += \"DEBIAN_FRONTEND\"" >> /etc/sudoers

USER runner

WORKDIR /home/runner

RUN curl -f -L -o runner.tar.gz https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz \
    && tar xzf ./runner.tar.gz \
    && rm runner.tar.gz

RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s - -y

ARG TOKEN
RUN test -n "$TOKEN" || (echo "must set github runner TOKEN: --build-arg TOKEN=XXX" && false)

RUN ./config.sh --name arch-server --work work --replace --url https://github.com/powdr-labs/powdr --token ${TOKEN}

# anything that should be in the PATH of the runner must be setup here
ENV PATH="/home/runner/.cargo/bin:$PATH"

CMD ["./run.sh"]


================================================
FILE: .github/workflows/build-cache.yml
================================================
name: Generate rust cache for PR builds
on:
  workflow_dispatch:
  schedule:
    - cron: '0 2 * * *' # run at 2 AM UTC

env:
    CARGO_TERM_COLOR: always

jobs:
  build:
    runs-on: warp-ubuntu-2404-x64-4x

    steps:
    - uses: actions/checkout@v4
      with:
        submodules: recursive

    - name: Save date of cache build
      run: mkdir target && date -R -u > target/cache-build-date.txt
    - name: Save commit hash of cache build
      run: git rev-parse HEAD > target/cache-commit-hash.txt

    ##### The block below is shared between cache build and PR build workflows #####
    - name: Install Rust toolchain nightly-2025-10-01 (with clippy and rustfmt)
      run: rustup toolchain install nightly-2025-10-01 --component clippy,rustfmt
    - name: Install Rust toolchain
      run: rustup toolchain install nightly-2025-02-14 --component rust-src
    - name: Install Rust toolchain 1.90 (stable)
      run: rustup toolchain install 1.90
    - name: Set cargo to perform shallow clones
      run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> $GITHUB_ENV
    - name: Format
      run: cargo fmt --all --check --verbose
    - name: Cargo check with Rust 1.90 (default features)
      run: cargo +1.90 check --all-targets
    - name: Lint no default features
      run: cargo clippy --all --all-targets --no-default-features --profile pr-tests --verbose -- -D warnings
    - name: Build
      run: cargo build --all-targets --features metrics --all --profile pr-tests --verbose
    ###############################################################################

    - name: Delete the old cache
      uses: WarpBuilds/cache@v1
      with:
        path: |
          ~/.cargo/registry/index/
          ~/.cargo/registry/cache/
          ~/.cargo/git/db/
          target/
          Cargo.lock
        key: ${{ runner.os }}-cargo-pr-tests
        delete-cache: true
    
    - name: ⚡ Save rust cache
      uses: WarpBuilds/cache/save@v1
      with:
        path: |
          ~/.cargo/registry/index/
          ~/.cargo/registry/cache/
          ~/.cargo/git/db/
          target/
          Cargo.lock
        key: ${{ runner.os }}-cargo-pr-tests


================================================
FILE: .github/workflows/dead-links.yml
================================================
name: Check markdown links
on: [pull_request, merge_group]
jobs:
  markdown-link-check:
    runs-on: ubuntu-24.04
    steps:
      - uses: actions/checkout@v4
      - uses: gaurav-nelson/github-action-markdown-link-check@v1
        with:
          use-quiet-mode: 'no'
          use-verbose-mode: 'yes'


================================================
FILE: .github/workflows/nightly-analyze.yml
================================================
name: Nightly Regression Analysis

on:
  workflow_dispatch:
  workflow_run:
    workflows: ["Nightly tests"]
    types:
      - completed

jobs:
  analyze:
    runs-on: ubuntu-latest
    # Only run if nightly tests completed successfully or failed (not skipped/cancelled)
    if: >-
      ${{
        github.event_name == 'workflow_dispatch' ||
        github.event.workflow_run.conclusion == 'success' ||
        github.event.workflow_run.conclusion == 'failure'
      }}

    steps:
      - uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.12'

      - name: Install dependencies
        run: pip install pandas

      - name: Run regression analysis
        id: analysis
        run: |
          # Run analysis and capture stdout (report) separately from stderr (logs)
          set +e
          python ./scripts/analyze_nightly.py --regression-threshold 2 > analysis_report.md
          EXIT_CODE=$?
          set -e

          # Set outputs for later steps
          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT

          # Save report as output (using delimiter for multiline)
          {
            echo "report<<EOF"
            cat analysis_report.md
            echo "EOF"
          } >> $GITHUB_OUTPUT

          # Print report to logs as well
          cat analysis_report.md

      - name: Generate job summary
        run: cat analysis_report.md >> $GITHUB_STEP_SUMMARY

      - name: Check for regressions
        if: ${{ steps.analysis.outputs.exit_code == '1' }}
        run: echo "::warning::Performance regressions detected! See job summary for details."

      - name: Check for errors
        if: ${{ steps.analysis.outputs.exit_code == '2' }}
        run: echo "::warning::Errors occurred during analysis. See job summary for details."

      - name: Send report to Matrix
        uses: fadenb/matrix-chat-message@v0.0.6
        with:
          homeserver: ${{ secrets.MATRIX_HOMESERVER }}
          token: ${{ secrets.MATRIX_ACCESS_TOKEN }}
          channel: ${{ secrets.MATRIX_ROOM_ID }}
          message: |
            ${{ steps.analysis.outputs.report }}

            [View workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})


================================================
FILE: .github/workflows/nightly-tests.yml
================================================
name: Nightly tests
on:
  workflow_dispatch:
  schedule:
    - cron: "0 23 * * *" # run at 11pm UTC

env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
  RUST_BACKTRACE: 1
  JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true"
  POWDR_OPENVM_SEGMENT_DELTA: 50000

jobs:
  bench:
    runs-on: warp-ubuntu-2404-x64-4x
    permissions:
      contents: write
      deployments: write
      pull-requests: write

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: ⚡ Restore rust cache
        id: cache
        uses: WarpBuilds/cache/restore@v1
        with:
          path: |
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            target/
            Cargo.lock
          key: ${{ runner.os }}-cargo-pr-tests
      - name: Install Rust toolchain nightly-2025-10-01 (with clippy and rustfmt)
        run: rustup toolchain install nightly-2025-10-01 --component clippy,rustfmt,rust-src
      - name: Install Rust toolchain 1.90
        run: rustup toolchain install 1.90
      - name: Install riscv target
        run: rustup target add riscv32imac-unknown-none-elf --toolchain nightly-2025-10-01
      - name: Install test dependencies
        run: sudo apt-get update && sudo apt-get install -y binutils-riscv64-unknown-elf lld
      - name: Run benchmarks
        # we add `|| exit 1` to make sure the step fails if `cargo bench` fails
        run: cargo bench --workspace --features "metrics" -- --output-format bencher | tee output.txt || exit 1
      - name: Store benchmark result
        uses: benchmark-action/github-action-benchmark@v1
        with:
          name: Benchmarks
          tool: "cargo"
          output-file-path: output.txt
          github-token: ${{ secrets.GITHUB_TOKEN }}
          auto-push: true
          alert-threshold: "120%"
          comment-on-alert: true
          summary-always: true

  test_apc:
    runs-on: server-dev

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: ⚡ Cache rust
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-release-apc-${{ hashFiles('**/Cargo.toml') }}

      - name: Build
        run: cargo build --release -p powdr-openvm

      - name: Install cargo openvm
        # Rust 1.90 is needed by fresher versions of dependencies of cargo-openvm.
        run: |
          rustup toolchain install 1.90
          cargo +1.90 install --git 'http://github.com/powdr-labs/openvm.git' --rev "v1.4.2-powdr-rc.4" --locked cargo-openvm

      - name: Setup python venv
        run: |
          python3 -m venv .venv
          source .venv/bin/activate
          pip install -r openvm-riscv/scripts/requirements.txt
          pip install -r autoprecompiles/scripts/requirements.txt

      - name: Remove old results if present
        run: |
          rm -rf results
          mkdir -p results

      - name: Run guest benchmarks
        run: |
          source .venv/bin/activate
          bash ./openvm-riscv/scripts/run_guest_benches.sh

      - name: Patch benchmark
        uses: ./.github/actions/patch-openvm-reth-benchmark

      - name: Run reth benchmark
        run: |
          source .venv/bin/activate
          cd openvm-reth-benchmark
          RES_DIR=reth
          mkdir -p $RES_DIR
          echo "export RPC_1=${{ secrets.RPC_1 }}" >> .env

          # prove with no APCs
          ./run.sh --apc 0 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with no APCs"
          mv metrics.json $RES_DIR/apc000.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc000.png $RES_DIR/apc000.json > $RES_DIR/trace_cells_apc000.txt

          # prove with 3 APCs
          ./run.sh --apc 3 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 3 APCs"
          mv metrics.json $RES_DIR/apc003.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc003.png $RES_DIR/apc003.json > $RES_DIR/trace_cells_apc003.txt

          # prove with 10 APCs
          ./run.sh --apc 10 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 10 APCs"
          mv metrics.json $RES_DIR/apc010.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc010.png $RES_DIR/apc010.json > $RES_DIR/trace_cells_apc010.txt

          # prove with 30 APCs
          ./run.sh --apc 30 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 30 APCs"
          mv metrics.json $RES_DIR/apc030.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc030.png $RES_DIR/apc030.json > $RES_DIR/trace_cells_apc030.txt

          # prove with 100 APCs, recording mem usage
          psrecord --include-children --interval 1 --log $RES_DIR/psrecord.csv --log-format csv --plot $RES_DIR/psrecord.png "./run.sh --apc 100 --mode prove-stark" || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 100 APCs"
          mv metrics.json $RES_DIR/apc100.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc100.png $RES_DIR/apc100.json > $RES_DIR/trace_cells_apc100.txt

          # The APC candidates would be the same for all runs, so just keep the last one
          mv apcs/apc_candidates.json $RES_DIR/apc_candidates.json

          python ../openvm-riscv/scripts/basic_metrics.py summary-table --csv $RES_DIR/apc000.json $RES_DIR/apc003.json $RES_DIR/apc010.json $RES_DIR/apc030.json $RES_DIR/apc100.json > $RES_DIR/basic_metrics.csv
          python ../openvm-riscv/scripts/basic_metrics.py plot $RES_DIR/apc000.json $RES_DIR/apc003.json $RES_DIR/apc010.json $RES_DIR/apc030.json $RES_DIR/apc100.json -o $RES_DIR/proof_time_breakdown.png
          python ../openvm-riscv/scripts/basic_metrics.py combine $RES_DIR/apc000.json $RES_DIR/apc003.json $RES_DIR/apc010.json $RES_DIR/apc030.json $RES_DIR/apc100.json > $RES_DIR/combined_metrics.json
          python ../autoprecompiles/scripts/plot_effectiveness.py $RES_DIR/apc_candidates.json --output $RES_DIR/effectiveness.png

          mv $RES_DIR ../results/

      - name: Save revisions and run info
        run: |
          echo "openvm-reth-benchmark: $(git -C openvm-reth-benchmark rev-parse HEAD)" > results/run.txt
          echo "powdr: $(git rev-parse HEAD)" >> results/run.txt
          echo "run: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" >> results/run.txt

      - name: upload result artifacts
        uses: actions/upload-artifact@v4
        with:
          name: bench-results
          path: |
            results/*

      - name: get the date/time
        id: date
        run: echo "value=$(date +'%Y-%m-%d-%H%M')" >> $GITHUB_OUTPUT

      - name: Generate bench results README
        run: |
          python3 ./openvm-riscv/scripts/generate_bench_results_readme.py \
            ./results \
            "${{ steps.date.outputs.value }}" \
            --output ./results/readme.md

      - name: commit to bench results
        uses: peaceiris/actions-gh-pages@v4
        with:
          personal_token: ${{ secrets.BENCH_RESULTS_TOKEN }}
          external_repository: powdr-labs/bench-results
          publish_dir: ./results
          destination_dir: results/${{ steps.date.outputs.value }}/
          keep_files: true
          enable_jekyll: true

  test_apc_gpu:
    runs-on: [self-hosted, gpu-shared]

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: ⚡ Cache rust
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-release-apc-gpu-${{ hashFiles('**/Cargo.toml') }}

      - name: Install cargo openvm
        # Rust 1.90 is needed by fresher versions of dependencies of cargo-openvm.
        run: |
          rustup toolchain install 1.90
          cargo +1.90 install --git 'http://github.com/powdr-labs/openvm.git' --rev "v1.4.2-powdr-rc.4" --locked cargo-openvm

      - name: Setup python venv
        run: |
          python3 -m venv .venv
          source .venv/bin/activate
          pip install -r openvm-riscv/scripts/requirements.txt
          pip install -r autoprecompiles/scripts/requirements.txt

      - name: Remove old results if present
        run: |
          rm -rf results
          mkdir -p results

      - name: Patch benchmark
        uses: ./.github/actions/patch-openvm-reth-benchmark

      - name: Run reth benchmark (GPU)
        run: |
          source .venv/bin/activate
          cd openvm-reth-benchmark
          RES_DIR=reth_gpu
          mkdir -p $RES_DIR
          echo "export RPC_1=${{ secrets.RPC_1 }}" >> .env

          # prove with no APCs
          ./run.sh --cuda --apc 0 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with no APCs"
          mv metrics.json $RES_DIR/apc000.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc000.png $RES_DIR/apc000.json > $RES_DIR/trace_cells_apc000.txt

          # prove with 10 APCs
          ./run.sh --cuda --apc 10 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 10 APCs"
          mv metrics.json $RES_DIR/apc010.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc010.png $RES_DIR/apc010.json > $RES_DIR/trace_cells_apc010.txt

          # prove with 30 APCs
          ./run.sh --cuda --apc 30 --mode prove-stark || exit 1
          # remove apc cache to not interfere with the next runs
          rm -rf apc-cache
          echo "Finished proving with 30 APCs"
          mv metrics.json $RES_DIR/apc030.json
          python ../openvm-riscv/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells_apc030.png $RES_DIR/apc030.json > $RES_DIR/trace_cells_apc030.txt

          # The APC candidates would be the same for all runs, so just keep the last one
          mv apcs/apc_candidates.json $RES_DIR/apc_candidates.json

          python ../openvm-riscv/scripts/basic_metrics.py summary-table --csv $RES_DIR/apc000.json $RES_DIR/apc010.json $RES_DIR/apc030.json > $RES_DIR/basic_metrics.csv
          python ../openvm-riscv/scripts/basic_metrics.py plot $RES_DIR/apc000.json $RES_DIR/apc010.json $RES_DIR/apc030.json -o $RES_DIR/proof_time_breakdown.png
          python ../openvm-riscv/scripts/basic_metrics.py combine $RES_DIR/apc000.json $RES_DIR/apc010.json $RES_DIR/apc030.json > $RES_DIR/combined_metrics.json
          python ../autoprecompiles/scripts/plot_effectiveness.py $RES_DIR/apc_candidates.json --output $RES_DIR/effectiveness.png

          mv $RES_DIR ../results/

      - name: Save revisions and run info
        run: |
          echo "openvm-reth-benchmark: $(git -C openvm-reth-benchmark rev-parse HEAD)" > results/run.txt
          echo "powdr: $(git rev-parse HEAD)" >> results/run.txt
          echo "run: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" >> results/run.txt

      - name: upload result artifacts
        uses: actions/upload-artifact@v4
        with:
          name: bench-results-gpu
          path: |
            results/*

      - name: get the date/time
        id: date
        run: echo "value=$(date +'%Y-%m-%d-%H%M')" >> $GITHUB_OUTPUT

      - name: Generate bench results README
        run: |
          python3 ./openvm-riscv/scripts/generate_bench_results_readme.py \
            ./results \
            "${{ steps.date.outputs.value }}-gpu" \
            --output ./results/readme.md

      - name: commit to bench results
        uses: peaceiris/actions-gh-pages@v4
        with:
          personal_token: ${{ secrets.BENCH_RESULTS_TOKEN }}
          external_repository: powdr-labs/bench-results
          publish_dir: ./results
          destination_dir: results/${{ steps.date.outputs.value }}-gpu/
          keep_files: true
          enable_jekyll: true


================================================
FILE: .github/workflows/post-merge-tests.yml
================================================
name: Post-merge APC tests
on:
  workflow_dispatch:
  push:
    branches:
      - main
    paths:
      - "**.rs"
      - "**.toml"

env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
  RUST_BACKTRACE: 1
  JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true"
  POWDR_OPENVM_SEGMENT_DELTA: 50000

jobs:
  test_guests_apc:
    runs-on: server-dev

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: ⚡ Cache rust
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-release-apc-${{ hashFiles('**/Cargo.toml') }}

      - name: Build
        run: cargo build --release -p powdr-openvm

      - name: Install cargo openvm
        # Rust 1.90 is needed by fresher versions of dependencies of cargo-openvm.
        run: |
          rustup toolchain install 1.90
          cargo +1.90 install --git 'http://github.com/powdr-labs/openvm.git' --rev "v1.4.2-powdr-rc.4" --locked cargo-openvm

      - name: Run keccak with 100 APCs
        run: /usr/bin/time -v cargo run --bin powdr_openvm_riscv -r prove guest-keccak --input 10000 --autoprecompiles 100 --recursion

      - name: Run ECC with 100 APCs
        run: /usr/bin/time -v cargo run --bin powdr_openvm_riscv -r prove guest-ecc-powdr-affine-hint --input 20 --autoprecompiles 100 --recursion

      - name: Run ecrecover with 100 APCs
        run: /usr/bin/time -v cargo run --bin powdr_openvm_riscv -r prove guest-ecrecover --input 20 --autoprecompiles 100 --recursion

      - name: Patch benchmark
        uses: ./.github/actions/patch-openvm-reth-benchmark

      - name: Run reth benchmark
        run: |
          cd openvm-reth-benchmark
          RES_DIR=reth
          mkdir -p $RES_DIR
          echo "export RPC_1=${{ secrets.RPC_1 }}" >> .env

          # prove with 100 APCs
          /usr/bin/time -v ./run.sh --apc 100  --mode prove-stark || exit 1
          echo "Finished proving with 100 APCs"

      - name: Save revisions and run info
        run: |
          echo "openvm-reth-benchmark: $(git -C openvm-reth-benchmark rev-parse HEAD)" > run.txt
          echo "powdr: $(git rev-parse HEAD)" >> run.txt
          echo "run: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" >> run.txt

      - name: upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: bench-results
          path: |
            run.txt


================================================
FILE: .github/workflows/pr-tests-with-secrets.yml
================================================
name: PR tests (with secrets)

# This workflow uses pull_request_target to allow external PRs to access secrets
# after a maintainer approves the workflow run
#
# SECURITY NOTE: This workflow intentionally checks out untrusted code from PRs
# to run tests with secrets. This is safe because:
# 1. GitHub requires maintainer approval before running for external contributors
# 2. The workflow code itself (this file) is controlled and runs from base branch
# 3. We only run predefined build/test commands, not arbitrary PR code
# 4. Cache poisoning risk is acceptable for these specific test jobs
on:
  # also allow this to be run manually (so we can test changes to the workflow in a branch)
  workflow_dispatch:
  pull_request_target:
    types: [opened, synchronize, reopened]

# cancel any previous running workflows for the same branch
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
  cancel-in-progress: true

# Minimal permissions for security
permissions:
  contents: read

env:
  CARGO_TERM_COLOR: always
  POWDR_OPENVM_SEGMENT_DELTA: 50000

jobs:
  test_apc_reth_compilation:
    runs-on: warp-ubuntu-2404-x64-8x

    steps:
      # IMPORTANT: Checkout the PR head, not the base branch
      - uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}
          submodules: recursive
      - name: ⚡ Cache rust
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.toml') }}
      - name: Build
        run: cargo build --release -p powdr-openvm

      - name: Install cargo openvm
        # Rust 1.90 is needed by fresher versions of dependencies of cargo-openvm.
        run: |
          rustup toolchain install 1.90
          cargo +1.90 install --git 'http://github.com/powdr-labs/openvm.git' --rev "v1.4.2-powdr-rc.4" --locked cargo-openvm

      - name: Patch benchmark
        uses: ./.github/actions/patch-openvm-reth-benchmark

      - name: Run small execution test with APCs
        run: |
          cd openvm-reth-benchmark
          echo "export RPC_1=${{ secrets.RPC_1 }}" >> .env
          PGO_TYPE="instruction" /usr/bin/time -v ./run.sh --apc 10 --mode compile

      # Check that reth commit is on main.
      # Do that after the actual test so that the step above passes when checking that a
      # reth PR commit works with a powdr PR.
      - name: Verify openvm-reth-benchmark ref is on main
        shell: bash
        run: |
          cd openvm-reth-benchmark
          if [ "$(git rev-parse --is-shallow-repository)" = "true" ]; then
            git fetch --quiet --unshallow origin main
          else
            git fetch --quiet origin main
          fi
          if ! git merge-base --is-ancestor HEAD origin/main; then
            echo "Pinned ref is not in origin/main history."
            echo "HEAD: $(git rev-parse HEAD)"
            echo "origin/main: $(git rev-parse origin/main)"
            exit 1
          fi

  test_apc_reth_app_proof:
    runs-on: warp-ubuntu-2404-x64-32x

    steps:
      # IMPORTANT: Checkout the PR head, not the base branch
      - uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}
          submodules: recursive

      - name: ⚡ Cache rust
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-release-apc-reth-app-proof-${{ hashFiles('**/Cargo.toml') }}

      - name: Build
        run: cargo build --release -p powdr-openvm

      - name: Install cargo openvm
        # Rust 1.90 is needed by fresher versions of dependencies of cargo-openvm.
        run: |
          rustup toolchain install 1.90
          cargo +1.90 install --git 'http://github.com/powdr-labs/openvm.git' --rev "v1.4.2-powdr-rc.4" --locked cargo-openvm

      - name: Setup python venv
        run: |
          python3 -m venv .venv
          source .venv/bin/activate
          pip install -r openvm-riscv/scripts/requirements.txt
          pip install -r autoprecompiles/scripts/requirements.txt

      - name: Patch benchmark
        uses: ./.github/actions/patch-openvm-reth-benchmark

      - name: Run reth benchmark
        run: |
          source .venv/bin/activate
          cd openvm-reth-benchmark
          RES_DIR=reth
          mkdir -p $RES_DIR
          echo "export RPC_1=${{ secrets.RPC_1 }}" >> .env

          # prove with 3 APCs
          APC=3 ./run.sh --mode prove-app || exit 1
          echo "Finished proving with 3 APCs"


================================================
FILE: .github/workflows/pr-tests.yml
================================================
name: PR tests

on:
  workflow_dispatch:
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
  merge_group:
  push:
    branches:
      - main

# cancel any previous running workflows for the same branch
concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

env:
  CARGO_TERM_COLOR: always
  POWDR_OPENVM_SEGMENT_DELTA: 50000

jobs:
  build_cpu:
    runs-on: warp-ubuntu-2404-x64-8x

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: recursive
      - name: ⚡ Restore rust cache
        id: cache
        uses: WarpBuilds/cache/restore@v1
        with:
          path: |
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            target/
            Cargo.lock
          key: ${{ runner.os }}-cargo-pr-tests
      - name: Date of the restored cache
        run: cat target/cache-build-date.txt
        continue-on-error: true
      - name: Check out cache commit state and update mtime accordingly.
        run: git checkout "$(cat target/cache-commit-hash.txt || echo 'f02fd626e2bb9e46a22ea1cda96b4feb5c6bda43')" && git ls-files -z | xargs -0 -n1 touch -d "Fri, 18 Apr 2025 03:30:58 +0000" && git checkout HEAD@{1}

      ##### The block below is shared between cache build and PR build workflows #####
      - name: Install Rust toolchain nightly-2025-10-01 (with clippy and rustfmt)
        run: rustup toolchain install nightly-2025-10-01 --component clippy,rustfmt
      - name: Install Rust toolchain 1.90 (stable)
        run: rustup toolchain install 1.90
      - name: Set cargo to perform shallow clones
        run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> $GITHUB_ENV
      - name: Format
        run: cargo fmt --all --check --verbose
      - name: Cargo check with Rust 1.90 (default features)
        run: cargo +1.90 check --all-targets
      - name: Lint no default features
        run: cargo clippy --all --all-targets --features metrics --profile pr-tests --verbose -- -D warnings
      - name: Build (CPU)
        run: cargo build --all-targets --features metrics --all --profile pr-tests --verbose
      ###############################################################################

      - uses: taiki-e/install-action@nextest
      - name: Create tests archive (CPU)
        run: cargo nextest archive --archive-file tests_cpu.tar.zst --cargo-profile pr-tests --workspace --no-default-features
      - name: Upload build artifacts (CPU)
        uses: actions/upload-artifact@v4
        with:
          name: tests_archive_cpu
          path: |
            tests_cpu.tar.zst

  test_quick_cpu:
    needs: build_cpu
    runs-on: ubuntu-24.04
    strategy:
      matrix:
        test:
          - "1"
          - "2"
          - "3"
          - "4"
          - "5"
          - "6"
          - "7"

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Init testing instance
        uses: ./.github/actions/init-testing-instance
      - name: Run default tests
        run: cargo nextest run --archive-file tests_cpu.tar.zst --workspace-remap . --verbose --partition count:"${{ matrix.test }}"/7 --no-tests=warn

  test_medium_cpu:
    needs: build_cpu
    runs-on: warp-ubuntu-2404-x64-16x
    strategy:
      matrix:
        test:
          - "1"
          - "2"
          - "3"
          - "4"

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Init testing instance
        uses: ./.github/actions/init-testing-instance
      - name: Run medium tests (ignored tests except large ones)
        run: cargo nextest run --archive-file tests_cpu.tar.zst --workspace-remap . --verbose --partition count:"${{ matrix.test }}"/7 --test-threads=4 -E 'not (test(_large))' --run-ignored only --no-tests=warn

  test_large_cpu:
    needs: build_cpu
    runs-on: warp-ubuntu-2404-x64-32x

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Init testing instance
        uses: ./.github/actions/init-testing-instance
      - name: Run large tests
        run: cargo nextest run --archive-file tests_cpu.tar.zst --workspace-remap . --verbose -E 'test(_large)' --run-ignored only --no-tests=warn

  udeps_cpu:
    runs-on: ubuntu-22.04

    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Install nightly toolchain
        uses: actions-rs/toolchain@v1
        with:
          toolchain: nightly
          override: true

      - name: Install cargo-udeps
        run: cargo install cargo-udeps --locked
      - name: Run cargo-udeps (CPU)
        run: cargo udeps --all-targets

  # NOTE: test_apc_reth_compilation has been moved to pr-tests-with-secrets.yml
  # This job requires secrets.RPC_1 and uses pull_request_target to work with external PRs

  build_gpu:
    if: github.event.pull_request.draft != true
    runs-on: [self-hosted, gpu-shared]
    timeout-minutes: 10

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
          submodules: recursive

      - name: Install Rust toolchain nightly-2025-10-01 (with clippy and rustfmt)
        run: rustup toolchain install nightly-2025-10-01 --component clippy,rustfmt
      - name: Install Rust toolchain 1.90 (stable)
        run: rustup toolchain install 1.90
      - name: Set cargo to perform shallow clones
        run: echo "CARGO_NET_GIT_FETCH_WITH_CLI=true" >> $GITHUB_ENV
      - name: Format
        run: cargo fmt --all --check --verbose
      - name: Cargo check with Rust 1.90 (all features)
        run: cargo +1.90 check --all-targets
      - name: Lint no default features
        run: cargo clippy --all --all-targets --features cuda,metrics,aot --profile pr-tests --verbose -- -D warnings
      - name: Build (GPU)
        run: cargo build --all-targets --features cuda,metrics,aot --all --profile pr-tests --verbose
      - uses: taiki-e/install-action@nextest
      - name: Create tests archive (GPU, features=cuda)
        run: cargo nextest archive --archive-file tests_gpu.tar.zst --cargo-profile pr-tests --workspace --package powdr-openvm-riscv --features cuda
      - name: Upload build artifacts (GPU)
        uses: actions/upload-artifact@v4
        with:
          name: tests_archive_gpu
          path: |
            tests_gpu.tar.zst

  test_quick_gpu:
    if: github.event.pull_request.draft != true
    needs: build_gpu
    runs-on: [self-hosted, gpu-shared]
    timeout-minutes: 30
    # TODO: we only have one runner on our GPU server, so can't partition yet; uncomment these once we have proper runners
    # strategy:
    #   matrix:
    #     test:
    #       - "1"
    #       - "2"
    #       - "3"
    #       - "4"
    #       - "5"
    #       - "6"
    #       - "7"

    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive
      - name: Init testing instance (GPU)
        uses: ./.github/actions/init-testing-instance-gpu
      - name: Run quick GPU tests from powdr-openvm-riscv only
        run: cargo nextest run --archive-file tests_gpu.tar.zst --workspace-remap . --verbose --no-tests=warn
        # run: cargo nextest run --archive-file tests_gpu.tar.zst --workspace-remap . --verbose --partition count:"${{ matrix.test }}"/7 --no-tests=warn

  # NOTE: test_apc_reth_app_proof has been moved to pr-tests-with-secrets.yml
  # This job requires secrets.RPC_1 and uses pull_request_target to work with external PRs


================================================
FILE: .gitignore
================================================
# Generated by Cargo
# will have compiled files and executables
/target/

# Cargo configuration
/.cargo/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

cargo_target/
riscv/tests/riscv_data/**/target


================================================
FILE: CLAUDE.md
================================================
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

powdr is a zkVM enhancement toolkit that provides autoprecompiles (automated synthesis of guest-specific precompiles) and a constraint solver. The codebase is tightly integrated with OpenVM and stark-backend from powdr-labs forks, providing autoprecompiles for the RISC-V zkVM OpenVM.

## Build Commands

```bash
# Build the workspace (CPU)
cargo build --all-targets --features metrics

# Build with GPU support
cargo build --all-targets --features cuda,metrics

# Check compilation
cargo check --all-targets

# Format code
cargo fmt --all

# Lint
cargo clippy --all --all-targets --features metrics -- -D warnings
```

## Testing

Always use `--release` for test runs. Use the `quick-10` profile as the default — it times out tests after 10s and continues. Check which tests timed out; only re-run them individually (without the quick profile) if you have good reason to believe your diff could affect them. All tests are run on CI anyway.

```bash
# Run all tests (default, with 10s timeout per test)
cargo nextest run --release --profile quick-10

# Run a single test
cargo nextest run --release <test_name>

# Run ignored (longer) tests
cargo nextest run --release --run-ignored only

# Run only large tests
cargo nextest run --release -E 'test(_large)' --run-ignored only

# Run tests in specific package
cargo nextest run --release -p powdr-openvm

# Available quick profiles (timeout per test, slow tests deprioritized):
#   --profile quick-10   (10s timeout, good default)
#   --profile quick-30   (30s timeout)
#   --profile quick-60   (60s timeout)
```

## CLI Usage

The main CLI is `powdr_openvm_riscv` (in `cli-openvm-riscv/`):

```bash
# Compile a guest program with autoprecompiles
cargo run -p cli-openvm -- compile guest-keccak --autoprecompiles 10 --pgo instruction --input 100

# Execute a compiled program
cargo run -p cli-openvm -- execute guest-keccak --autoprecompiles 10 --input 100

# Prove (generate ZK proof)
cargo run -p cli-openvm -- prove guest-keccak --autoprecompiles 1 --input 10

# Mock prove (debug mode, verifies constraints without full proof)
cargo run -p cli-openvm -- prove guest-keccak --mock --autoprecompiles 1 --input 10
```

## Architecture

### Core Crates

- **autoprecompiles** (`autoprecompiles/`): The main precompile synthesis engine. Analyzes basic blocks of agnostic assembly instructions and synthesizes optimized circuits (APCs - Autoprecompiles). Key modules:
  - `optimizer.rs`: Constraint optimization pipeline
  - `constraint_optimizer.rs`: Eliminates redundant constraints
  - `symbolic_machine_generator.rs`: Converts instruction sequences to symbolic machines
  - `pgo/`: Profile-guided optimization for APC selection

- **constraint-solver** (`constraint-solver/`): Algebraic constraint analysis and solving. Provides:
  - `grouped_expression.rs`: Expression representation for efficient manipulation
  - `indexed_constraint_system.rs`: Efficient constraint system indexing
  - `range_constraint.rs`: Range analysis for variables
  - `inliner.rs`: Constraint inlining with degree bounds

- **openvm** (`openvm/`): OpenVM integration layer. Connects powdr optimizations to the OpenVM zkVM:
  - `customize_exe.rs`: Modifies OpenVM executables to use APCs
  - `powdr_extension/`: OpenVM circuit extension for APCs
  - `trace_generation.rs`: Generates execution traces for proving

### Supporting Crates

- **expression** (`expression/`): Core algebraic expression types (`AlgebraicExpression`, operators)
- **number** (`number/`): Field element abstractions
- **riscv-elf** (`riscv-elf/`): ELF file parsing for RISC-V binaries
- **cli-openvm** (`cli-openvm/`): Command-line interface

### Guest Programs

Example guest programs in `openvm/guest-*` directories (keccak, sha256, ecc, pairing, etc.) are used for testing and benchmarking.

## Key Concepts

- **APC (Autoprecompile)**: An optimized circuit for a basic block of assembly instructions (often RISC-V)
- **PGO (Profile-Guided Optimization)**: Uses execution profiling to select which basic blocks to optimize
  - `PgoConfig::Cell`: Optimizes based on total cell count savings
  - `PgoConfig::Instruction`: Optimizes based on instruction execution frequency
- **Symbolic Machine**: Intermediate representation of constraints and bus interactions
- **Bus Interactions**: Communication between different chips/machines in the OpenVM architecture

## Coding Guidelines

### Coding Style
- Write idiomatic Rust code. Follow Rust conventions and best practices, and keep the style similar to existing code in the repository.
- Try to minimize code, reusing existing functions and modules where possible.
- Keep diffs small and focused. Avoid unrelated changes, unnecessary refactoring, or adding comments to unchanged code.
- Use builder pattern with `with_*` methods for structs with optional configuration.

### Before Returning to User
Always run these checks before claiming work is complete:
1. Format code
2. Check clippy
3. Run relevant tests and / or end-to-end tests using the CLI

### Git Workflow
- Use `git push origin <branchname>`
- Never use `git add .` - explicitly add modified files only

### PR Workflow

Use the GitHub CLI to interact with GitHub, for example:
- Create PR (always use --draft): `gh pr create --repo https://github.com/powdr-labs/powdr --base main --draft --title "..." --body "..."`
- Check CI status: `gh pr checks --repo https://github.com/powdr-labs/powdr <pr-number>`
- View PR comments: `gh pr view --repo https://github.com/powdr-labs/powdr <pr-number> --comments`
- View review comments on code: `gh api repos/powdr-labs/powdr/pulls/<pr-number>/comments`


================================================
FILE: Cargo.toml
================================================
[workspace]

resolver = "2"

members = [
  "number",
  "constraint-solver",
  "expression",
  "riscv-elf",
  "riscv-types",
  "isa-utils",
  "syscalls",
  "autoprecompiles",
  "openvm",
  "openvm-bus-interaction-handler",
  "openvm-riscv",
  "cli-openvm-riscv",
  "openvm-riscv/extensions/hints-guest",
  "openvm-riscv/extensions/hints-transpiler",
  "openvm-riscv/extensions/hints-circuit",
]

exclude = ["riscv-runtime"]

[workspace.package]
version = "0.1.4"
edition = "2021"
license = "MIT"
homepage = "https://powdr.org"
repository = "https://github.com/powdr-labs/powdr"

[workspace.dependencies]
# workspace crates
powdr-constraint-solver = { path = "./constraint-solver", version = "0.1.4" }
powdr-isa-utils = { path = "./isa-utils", version = "0.1.4" }
powdr-expression = { path = "./expression", version = "0.1.4" }
powdr-number = { path = "./number", version = "0.1.4" }
powdr-riscv-elf = { path = "./riscv-elf", version = "0.1.4" }
powdr-riscv-types = { path = "./riscv-types", version = "0.1.4" }
powdr-syscalls = { path = "./syscalls", version = "0.1.4" }
powdr-autoprecompiles = { path = "./autoprecompiles", version = "0.1.4" }
powdr-openvm-riscv = { path = "./openvm-riscv", version = "0.1.4" }
powdr-openvm-bus-interaction-handler = { path = "./openvm-bus-interaction-handler", version = "0.1.4" }
powdr-openvm = { path = "./openvm", version = "0.1.4" }

powdr-openvm-riscv-hints-guest = { path = "./openvm-riscv/extensions/hints-guest", version = "0.1.4" }
powdr-openvm-riscv-hints-transpiler = { path = "./openvm-riscv/extensions/hints-transpiler", version = "0.1.4" }
powdr-openvm-riscv-hints-circuit = { path = "./openvm-riscv/extensions/hints-circuit", version = "0.1.4" }

# openvm
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-build = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-rv32im-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-rv32im-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-rv32im-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false }
openvm-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-circuit-derive = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-circuit-primitives = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-circuit-primitives-derive = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-instructions = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-instructions-derive = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-sdk = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false, features = [
  "parallel",
  "jemalloc",
  "nightly-features",
  "evm-prove",
] }
openvm-ecc-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-ecc-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-keccak256-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-keccak256-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-sha256-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-sha256-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-algebra-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-algebra-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-bigint-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-bigint-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-pairing-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-pairing-transpiler = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-native-circuit = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false }
openvm-native-recursion = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false }
openvm-platform = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-custom-insn = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }

# stark-backend
openvm-stark-sdk = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "v1.2.2-powdr-2026-03-20", default-features = false, features = [
  "parallel",
  "jemalloc",
  "nightly-features",
] }
openvm-stark-backend = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "v1.2.2-powdr-2026-03-20", default-features = false, features = [
  "parallel",
  "jemalloc",
] }
openvm-cuda-backend = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "v1.2.2-powdr-2026-03-20", default-features = false }
openvm-cuda-builder = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "v1.2.2-powdr-2026-03-20", default-features = false }
openvm-cuda-common = { git = "https://github.com/powdr-labs/stark-backend.git", rev = "v1.2.2-powdr-2026-03-20", default-features = false }

# external dependencies
num-traits = "0.2.19"
itertools = "0.14"
derive_more = { version = "2", features = ["full"] }
log = "0.4.27"
serde = "1.0.228"
tracing = "0.1.40"
eyre = "0.6.12"
serde_cbor = "0.11.2"
metrics = "0.23.0"
derivative = "2.2.0"
serde_json = "^1.0.140"

# dev dependencies
test-log = "0.2.19"
pretty_assertions = "1.4.1"
env_logger = "0.11.8"

[profile.pr-tests]
inherits = "dev"
opt-level = 3
debug = "line-tables-only"
debug-assertions = true
overflow-checks = true
panic = 'unwind'
incremental = true         # This is true because target is cached
codegen-units = 256

[profile.release-with-debug]
inherits = "release"
debug = true

[workspace.lints.clippy]
print_stdout = "deny"
uninlined_format_args = "deny"
iter_over_hash_type = "deny"

# Uncomment both patches below for local stark-backend and openvm.
# The local openvm also needs to have stark-backend patched so all types match.
# [patch."https://github.com/powdr-labs/stark-backend.git"]
# openvm-stark-sdk = { path = "../stark-backend/crates/stark-sdk", default-features = false }
# openvm-stark-backend = { path = "../stark-backend/crates/stark-backend", default-features = false }
# openvm-cuda-backend = { path = "../stark-backend/crates/cuda-backend", default-features = false }
# openvm-cuda-builder = { path = "../stark-backend/crates/cuda-builder", default-features = false }
# openvm-cuda-common = { path = "../stark-backend/crates/cuda-common", default-features = false }

# [patch."https://github.com/powdr-labs/openvm.git"]
# openvm = { path = "../openvm/crates/toolchain/openvm" }
# openvm-build = { path = "../openvm/crates/toolchain/build" }
# openvm-rv32im-circuit = { path = "../openvm/extensions/rv32im/circuit/" }
# openvm-rv32im-transpiler = { path = "../openvm/extensions/rv32im/transpiler" }
# openvm-rv32im-guest = { path = "../openvm/extensions/rv32im/guest" }
# openvm-transpiler = { path = "../openvm/crates/toolchain/transpiler" }
# openvm-circuit = { path = "../openvm/crates/vm" }
# openvm-circuit-derive = { path = "../openvm/crates/vm/derive" }
# openvm-circuit-primitives = { path = "../openvm/crates/circuits/primitives" }
# openvm-circuit-primitives-derive = { path = "../openvm/crates/circuits/primitives/derive" }
# openvm-instructions = { path = "../openvm/crates/toolchain/instructions" }
# openvm-instructions-derive = { path = "../openvm/crates/toolchain/instructions/derive" }
# openvm-sdk = { path = "../openvm/crates/sdk" }
# openvm-ecc-circuit = { path = "../openvm/extensions/ecc/circuit" }
# openvm-ecc-transpiler = { path = "../openvm/extensions/ecc/transpiler" }
# openvm-keccak256-circuit = { path = "../openvm/extensions/keccak256/circuit" }
# openvm-keccak256-transpiler = { path = "../openvm/extensions/keccak256/transpiler" }
# openvm-sha256-circuit = { path = "../openvm/extensions/sha256/circuit" }
# openvm-sha256-transpiler = { path = "../openvm/extensions/sha256/transpiler" }
# openvm-algebra-circuit = { path = "../openvm/extensions/algebra/circuit" }
# openvm-algebra-transpiler = { path = "../openvm/extensions/algebra/transpiler" }
# openvm-bigint-circuit = { path = "../openvm/extensions/bigint/circuit" }
# openvm-bigint-transpiler = { path = "../openvm/extensions/bigint/transpiler" }
# openvm-pairing-circuit = { path = "../openvm/extensions/pairing/circuit" }
# openvm-pairing-transpiler = { path = "../openvm/extensions/pairing/transpiler" }
# openvm-native-circuit = { path = "../openvm/extensions/native/circuit" }
# openvm-native-recursion = { path = "../openvm/extensions/native/recursion" }
# openvm-platform = { path = "../openvm/crates/toolchain/platform" }
# openvm-custom-insn = { path = "../openvm/crates/toolchain/custom_insn" }


================================================
FILE: LICENSE-APACHE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: LICENSE-MIT
================================================
MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<p align="center">
  <img src="assets/powdr_wires.png" width="600">
</p>

# powdr

[![Matrix Chat](https://img.shields.io/badge/Matrix%20-chat-brightgreen?style=plastic&logo=matrix)](https://matrix.to/#/#powdr:matrix.org)
[![Twitter Follow](https://img.shields.io/twitter/follow/powdr_labs?style=plastic&logo=twitter)](https://twitter.com/powdr_labs)<!-- markdown-link-check-disable-line -->

> WARNING: This codebase is experimental and has not been audited. DO NOT USE FOR PRODUCTION!

If you have any questions or want to contribute, feel free to write us in our [Matrix Chat](https://matrix.to/#/#powdr:matrix.org).

*powdr* provides state-of-the-art performance and security to zkVMs, enhancing them with compiler-based techniques including static analysis and formal verification.

The main components are:

- [Autoprecompiles](https://www.powdr.org/blog/auto-acc-circuits): automated synthesis of guest-specific precompiles.
- Constraint Solver: compile-time solver used to detect potential optimizations and security issues.
- powdr-OpenVM: powdr extensions for [OpenVM](https://github.com/openvm-org/openvm/).

## powdr-legacy

The previous versions of powdr are now archived in the [powdr-legacy](https://github.com/powdr-labs/powdr-legacy) repository.
It contains all previous crates regarding provers, powdr-asm, powdr-pil, powdrVM, stdlib circuits and RISC-V support.

### Project structure

For an overview of the project structure, run:

```
cargo doc --workspace --no-deps --open
```

## Contributing

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
dual licensed as below, without any additional terms or conditions.

## License

This project is licensed under either of

<!-- markdown-link-check-disable -->
- [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0) ([`LICENSE-APACHE`](LICENSE-APACHE))
- [MIT license](https://opensource.org/licenses/MIT) ([`LICENSE-MIT`](LICENSE-MIT))
<!-- markdown-link-check-enable -->

at your option.


================================================
FILE: autoprecompile-analyzer/Claude.md
================================================
# APC Effectiveness Analyzer

Single-page web app for visualizing Automatic Precompile (APC) candidate effectiveness in zkVM systems. Port of `plot_effectiveness.py` from powdr.

## Project Structure
```
index.html          # SPA with embedded JS/CSS (~2000 lines)
CLAUDE.md           # This file
```

## Data Format

Current version:

**Version 4** (current): Each APC is a *superblock* composed of one or more basic blocks.
```json
{
  "version": 4,
  "apcs": [{
    "execution_frequency": 50000,
    "original_blocks": [
      { "start_pc": 12345, "instructions": ["instr1", "instr2"] },
      { "start_pc": 12360, "instructions": ["instr3"] }
    ],
    "stats": {
      "before": { "main_columns": 100, "constraints": 200, "bus_interactions": 50 },
      "after": { "main_columns": 50, "constraints": 100, "bus_interactions": 25 }
    },
    "width_before": 100,
    "value": 5000,
    "cost_before": 1000.0,
    "cost_after": 500.0,
  }],
  "labels": { "2099200": ["memset"], "2099448": ["memcpy"] }
}
```

All older formats are normalized to `original_blocks` on load:
- **Versions 2 & 3**: `original_block: { start_pc, instructions }` → wrapped in a 1-element array
- **Version 1** (no `version` field): `original_block` with `statements` → `original_blocks[0]` with `instructions`
- **Version 0** (bare array): same as v1 without wrapper, no labels

**Visualization model**: A block's identity is its `block_id` — a comma-separated list of hex PCs (e.g., `0x3000,0x3050`). `start_pc` is the first basic block's PC (used for sorting/display). Multiple blocks may share the same basic block PC.

## Testing

Start server:
```bash
python3 -m http.server 8000 &
```

Test URL with real data (~11,300 APCs):
```
http://localhost:8000/?data=https%3A%2F%2Fgithub.com%2Fpowdr-labs%2Fbench-results%2Fblob%2Fgh-pages%2Fresults%2F2026-01-27-0453%2Freth%2Fapc_candidates.json
```

Verify:
- Data loads (GitHub URLs auto-convert to raw)
- Bar chart shows ~3.28x mean effectiveness
- Value-cost plot reaches ~80% savings at 1000 APCs
- Labels table expands with function names
- Block selection syncs across all views

Cache-bust: append `&_t=1` to URL.

## URL Parameters

```
?data=<url>           # Data source (required to load data)
&plot=value-cost      # Show value-cost plot (omit for default bar chart)
&block=0x2008f8       # Select block by PC address (hex)
```

Example - jump directly to value-cost plot with a block selected:
```
http://localhost:8000/?data=<url>&plot=value-cost&block=0x200af0
```

URL updates automatically as you interact with the app, enabling easy sharing of specific views.

## Development Notes

**D3.js chart redraw**: Charts are fully recreated on metric switch. Ensure `.remove()` is called on exit selections to prevent memory leaks.

**State persistence**: `selectedBlock` must survive metric changes. Check selection still exists in new processed data.

**GitHub URL conversion**: `loadFromUrl()` has regex converting blob URLs to raw URLs. Brittle - test after GitHub URL format changes.

**Grouping threshold**: Blocks <0.1% of total cells grouped as "Other". Hardcoded in `createChart()`.

**Weighted mean**: `sum(effectiveness * traceCells) / sum(traceCells)` - weights by trace cells, not block count.

### Common Errors
- **CORS**: GitHub blob URLs must convert to raw URLs
- **D3 selections**: Use enter/update/exit patterns; don't forget `.remove()`
- **Event handlers**: Remove old handlers when recreating charts
- **Test with full dataset**: ~11K items, not small test data


================================================
FILE: autoprecompile-analyzer/index.html
================================================
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>APC Effectiveness Analyzer</title>

    <!-- Bootstrap CSS -->
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <style>
        .copy-icon:hover { color: white !important; }
        .copy-icon .copy-check { display: none; }
        .copy-icon .copy-default { display: inline; }
        .copy-icon.copied .copy-check { display: inline; }
        .copy-icon.copied .copy-default { display: none; }
        .copy-icon.copied { color: #7ee787 !important; }
    </style>

    <!-- D3.js -->
    <script src="https://d3js.org/d3.v7.min.js"></script>

    <style>
        body {
            background-color: #f8f9fa;
        }

        .drop-zone {
            border: 3px dashed #dee2e6;
            border-radius: 10px;
            padding: 40px;
            text-align: center;
            cursor: pointer;
            transition: all 0.3s;
            background-color: white;
        }

        .drop-zone:hover,
        .drop-zone.dragover {
            border-color: #0d6efd;
            background-color: #e7f1ff;
        }

        .chart-container {
            background-color: white;
            border-radius: 10px;
            padding: 20px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }

        .bar-highlight {
            stroke: #ffc10755 !important;
            stroke-width: 2 !important;
        }

        .bar-selected {
            stroke: #ffc107 !important;
            stroke-width: 4 !important;
        }

        .code-panel-container {
            position: relative;
        }

        .sticky-label-header {
            position: sticky;
            top: 0;
            z-index: 100;
            background-color: #e3f2fd;
            border-left: 4px solid #2196f3;
            padding: 8px 10px;
            font-weight: bold;
            color: #1565c0;
            font-family: 'Courier New', monospace;
            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
            display: none;
            cursor: pointer;
            transition: background-color 0.2s;
        }

        .sticky-label-header.active {
            display: block;
        }

        .sticky-label-header:hover {
            background-color: #bbdefb;
        }

        .code-panel {
            font-family: 'Courier New', monospace;
            background-color: #f8f9fa;
            border-radius: 5px;
            padding: 15px;
            max-height: 500px;
            overflow-y: auto;
            position: relative;
        }

        .label-line {
            background-color: #e3f2fd;
            border-left: 4px solid #2196f3;
            padding: 8px 10px;
            margin: 15px 0 5px 0;
            font-weight: bold;
            color: #1565c0;
        }

        .label-name {
            font-size: 11px;
            word-break: break-all;
            line-height: 1.4;
        }

        .labels-table {
            width: 100%;
            margin-top: 10px;
            font-size: 12px;
        }

        .labels-table th {
            cursor: pointer;
            user-select: none;
            padding: 8px;
            background-color: #f8f9fa;
            border-bottom: 2px solid #dee2e6;
            position: relative;
        }

        .labels-table th:hover {
            background-color: #e9ecef;
        }

        .labels-table th.sorted-asc::after {
            content: ' ▲';
            font-size: 10px;
        }

        .labels-table th.sorted-desc::after {
            content: ' ▼';
            font-size: 10px;
        }

        .labels-table td {
            padding: 8px;
            border-bottom: 1px solid #dee2e6;
        }

        .labels-table tbody tr {
            cursor: pointer;
        }

        .labels-table tbody tr:hover {
            background-color: #f8f9fa;
        }

        .label-cell {
            max-width: 400px;
            word-break: break-all;
            font-family: 'Courier New', monospace;
            font-size: 11px;
        }

        .collapsible-header {
            cursor: pointer;
            user-select: none;
            display: flex;
            align-items: center;
            gap: 10px;
        }

        .collapsible-header:hover {
            opacity: 0.8;
        }

        .collapse-icon {
            transition: transform 0.3s;
            font-size: 14px;
        }

        .collapse-icon.collapsed {
            transform: rotate(-90deg);
        }

        .labels-table-wrapper {
            max-height: 300px;
            overflow-y: auto;
            border: 1px solid #dee2e6;
            border-radius: 5px;
        }

        .label-row {
            position: relative;
        }

        .expand-icon {
            display: inline-block;
            width: 16px;
            transition: transform 0.2s;
            cursor: pointer;
        }

        .expand-icon.expanded {
            transform: rotate(90deg);
        }

        .blocks-detail-row {
            background-color: #f8f9fa;
        }

        .blocks-detail-row td:nth-child(2) {
            padding-left: 30px !important;
        }

        .blocks-detail-row:hover {
            background-color: #e9ecef;
        }

        .code-block {
            border: 1px solid #dee2e6;
            border-radius: 3px;
            margin: 10px 0;
            padding: 10px;
            background-color: white;
        }

        .code-block-header {
            font-weight: bold;
            margin-bottom: 5px;
            color: #495057;
            font-size: 12px;
        }

        .code-block.selected {
            border-color: #ffc107;
            background-color: #fff9e6;
            box-shadow: 0 0 5px rgba(255, 193, 7, 0.3);
        }

        .code-line {
            margin: 2px 0;
            padding: 2px 5px;
            cursor: pointer;
        }

        .code-line:hover {
            background-color: #e9ecef;
        }

        .code-line.highlighted {
            background-color: #fff3cd;
        }

        .tooltip {
            position: absolute;
            text-align: left;
            padding: 10px;
            font: 12px sans-serif;
            background: rgba(0, 0, 0, 0.85);
            color: white;
            border-radius: 5px;
            pointer-events: none;
            z-index: 1000;
        }

        .bar {
            stroke: black;
            stroke-width: 0.5;
            opacity: 0.8;
            cursor: pointer;
        }

        .bar:hover {
            opacity: 1;
        }

        .mean-line {
            stroke: red;
            stroke-width: 2;
            stroke-dasharray: 5, 5;
            opacity: 0.7;
        }

        .grid line {
            stroke: #e0e0e0;
            stroke-opacity: 0.7;
        }

        .grid path {
            stroke-width: 0;
        }
    </style>
</head>

<body>
    <nav class="navbar navbar-dark bg-primary">
        <div class="container-fluid">
            <span class="navbar-brand mb-0 h1" id="pageTitle" style="cursor: pointer;">APC Effectiveness Analyzer</span>
            <span class="text-white small ms-auto text-end" id="dataSourceDisplay"
                style="display:none; flex:1 1 auto; max-width:calc(100vw - 220px); white-space:nowrap; overflow:hidden; text-overflow:ellipsis; font-family: 'Courier New', monospace;"></span>
        </div>
    </nav>

    <div class="container mt-4">
        <!-- File Upload Section -->
        <div id="uploadSection" class="row mb-4">
            <div class="col-12">
                <div class="drop-zone" id="dropZone">
                    <h4>Drop JSON file here or click to upload</h4>
                    <p class="text-muted">Upload APC candidates JSON file</p>
                    <input type="file" id="fileInput" accept=".json" style="display: none;">
                </div>
                <div class="mt-3">
                    <div class="input-group">
                        <span class="input-group-text">Or paste URL:</span>
                        <input type="text" id="urlInput" class="form-control"
                            placeholder="https://example.com/data.json or GitHub link">
                        <button class="btn btn-primary" id="loadUrlBtn">Load from URL</button>
                    </div>
                    <small class="text-muted">Supports direct JSON URLs and GitHub file links</small>
                </div>
            </div>
        </div>

        <!-- Main App Section (hidden initially) -->
        <div id="appSection" style="display: none;">
            <div class="row">
                <!-- Left column: plot + labels + code -->
                <div class="col-12 col-lg-9">
                    <div id="vizSection">
                        <div class="row">
                            <div class="col-12">
                                <div class="chart-container">
                                    <ul class="nav nav-pills mb-3" id="chartTabs" role="tablist">
                                        <li class="nav-item" role="presentation">
                                            <button class="nav-link active" id="tab-effectiveness"
                                                type="button">Effectiveness by Basic Block</button>
                                        </li>
                                        <li class="nav-item" role="presentation">
                                            <button class="nav-link" id="tab-secondary" type="button">Saved proving cost
                                                vs added verifier cost</button>
                                        </li>
                                    </ul>
                                    <div id="chartTabEffectiveness">
                                        <div id="chart"></div>
                                    </div>
                                    <div id="chartTabSecondary" style="display: none;">
                                        <div id="chartValueCost"></div>
                                    </div>
                                </div>
                            </div>
                        </div>

                        <!-- Labels Summary Section -->
                        <div class="row mt-4" id="labelsSummarySection" style="display: none;">
                            <div class="col-12">
                                <div class="chart-container">
                                    <div class="collapsible-header" id="labelsHeader">
                                        <span class="collapse-icon collapsed">▼</span>
                                        <h5 style="margin: 0;">Labels</h5>
                                    </div>
                                    <div id="labelsContent" style="display: none; margin-top: 10px;">
                                        <div class="labels-table-wrapper">
                                            <div id="labelsTableContainer"></div>
                                        </div>
                                    </div>
                                </div>
                            </div>
                        </div>

                        <!-- Code Panel Section -->
                        <div class="row mt-4">
                            <div class="col-12">
                                <div class="chart-container">
                                    <h5>Program Code</h5>
                                    <div class="code-panel-container">
                                        <div id="stickyLabelHeader" class="sticky-label-header"></div>
                                        <div id="codePanel" class="code-panel">
                                            <span class="text-muted">No data loaded</span>
                                        </div>
                                    </div>
                                </div>
                            </div>
                        </div>
                    </div>
                </div>

                <!-- Right column: controls -->
                <div class="col-12 col-lg-3">
                    <div id="infoSection" class="mb-4" style="display: none;">
                        <div class="chart-container">
                            <h5>Info</h5>
                            <p class="text-muted small" id="plotInfoText" style="margin-bottom: 0;">
                            </p>
                        </div>
                    </div>

                    <div id="controlsSection" class="mb-4" style="display: none;">
                        <div class="chart-container">
                            <div class="row g-3">
                                <div class="col-12">
                                    <label for="effectivenessType" class="form-label">Cost Metric:</label>
                                    <select id="effectivenessType" class="form-select">
                                        <option value="cost" selected>Total Cost</option>
                                        <option value="main_columns">Main Columns</option>
                                        <option value="constraints">Constraints</option>
                                        <option value="bus_interactions">Bus Interactions</option>
                                    </select>
                                </div>
                                <div class="col-12">
                                    <label for="pcSearch" class="form-label">Block ID:</label>
                                    <div class="input-group">
                                        <input type="text" id="pcSearch" class="form-control">
                                        <button class="btn btn-primary" id="pcSearchBtn">Go</button>
                                    </div>
                                </div>
                            </div>
                        </div>
                    </div>

                    <div id="selectedBlockSection" class="mb-4" style="display: none;">
                        <div class="chart-container">
                            <h5>Selected Block</h5>
                            <div class="mb-2">
                                <span id="codeBlockInfo" class="text-muted">Click on a bar or code line to select a
                                    block</span>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <script>
        let currentData = null;
        let currentLabels = {};
        let chart = null;
        let selectedBlock = null;
        let lastData = null;
        let lastMeanEffectiveness = null;

        // File handling
        const dropZone = document.getElementById('dropZone');
        const fileInput = document.getElementById('fileInput');
        const uploadSection = document.getElementById('uploadSection');
        const appSection = document.getElementById('appSection');
        const controlsSection = document.getElementById('controlsSection');
        const vizSection = document.getElementById('vizSection');
        const effectivenessType = document.getElementById('effectivenessType');
        const pageTitle = document.getElementById('pageTitle');
        const dataSourceDisplay = document.getElementById('dataSourceDisplay');
        const pcSearch = document.getElementById('pcSearch');
        const pcSearchBtn = document.getElementById('pcSearchBtn');
        const tabEffectiveness = document.getElementById('tab-effectiveness');
        const tabSecondary = document.getElementById('tab-secondary');
        const chartTabEffectiveness = document.getElementById('chartTabEffectiveness');
        const chartTabSecondary = document.getElementById('chartTabSecondary');
        const plotInfoText = document.getElementById('plotInfoText');

        // Plot point sizing (value-cost plot)
        const POINT_RADIUS = 3;
        const HOVER_RADIUS = 6;
        const SELECTED_RADIUS = 6;

        const DATA_DISPLAY_MAX = 70;
        const DATA_DISPLAY_HEAD = 35;
        const DATA_DISPLAY_TAIL = 35;

        function shortenUrl(urlText) {
            if (urlText.length <= DATA_DISPLAY_MAX) return urlText;
            return `${urlText.slice(0, DATA_DISPLAY_HEAD)}...${urlText.slice(-DATA_DISPLAY_TAIL)}`;
        }

        function updateDataSourceDisplay(source) {
            if (source) {
                const displayText = shortenUrl(source.replace(/^https?:\/\//i, ''));
                const href = source;
                dataSourceDisplay.innerHTML = `Data: <a href="${href}" target="_blank" rel="noopener noreferrer" style="color: white; text-decoration: underline;">${displayText}</a>`
                    + ` <svg class="copy-icon" onclick="navigator.clipboard.writeText('${href.replace(/'/g, "\\'")}').then(() => { this.classList.add('copied'); this.setAttribute('title','Copied!'); setTimeout(() => { this.classList.remove('copied'); this.setAttribute('title','Copy data URL'); }, 2000); })" `
                    + `title="Copy data URL" width="16" height="16" viewBox="0 0 16 16" fill="currentColor" style="margin-left:0.4rem; cursor:pointer; color:rgba(255,255,255,0.7); vertical-align:middle;">`
                    + `<path class="copy-default" d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 010 1.5h-1.5a.25.25 0 00-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 00.25-.25v-1.5a.75.75 0 011.5 0v1.5A1.75 1.75 0 019.25 16h-7.5A1.75 1.75 0 010 14.25z"/>`
                    + `<path class="copy-default" d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0114.25 11h-7.5A1.75 1.75 0 015 9.25zm1.75-.25a.25.25 0 00-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 00.25-.25v-7.5a.25.25 0 00-.25-.25z"/>`
                    + `<path class="copy-check" d="M13.78 4.22a.75.75 0 010 1.06l-7.25 7.25a.75.75 0 01-1.06 0L2.22 9.28a.75.75 0 011.06-1.06L6 10.94l6.72-6.72a.75.75 0 011.06 0z"/>`
                    + `</svg>`;
                dataSourceDisplay.style.display = 'inline';
            } else {
                dataSourceDisplay.innerHTML = '';
                dataSourceDisplay.style.display = 'none';
            }
        }

        // Drop zone events
        dropZone.addEventListener('click', () => fileInput.click());

        dropZone.addEventListener('dragover', (e) => {
            e.preventDefault();
            dropZone.classList.add('dragover');
        });

        dropZone.addEventListener('dragleave', () => {
            dropZone.classList.remove('dragover');
        });

        dropZone.addEventListener('drop', (e) => {
            e.preventDefault();
            dropZone.classList.remove('dragover');
            const files = e.dataTransfer.files;
            if (files.length > 0) {
                handleFile(files[0]);
            }
        });

        fileInput.addEventListener('change', (e) => {
            if (e.target.files.length > 0) {
                handleFile(e.target.files[0]);
            }
        });

        // URL loading functionality
        const urlInput = document.getElementById('urlInput');
        const loadUrlBtn = document.getElementById('loadUrlBtn');

        tabEffectiveness.addEventListener('click', () => {
            tabEffectiveness.classList.add('active');
            tabSecondary.classList.remove('active');
            chartTabEffectiveness.style.display = 'block';
            chartTabSecondary.style.display = 'none';
            updateInfoText();
            if (lastData) {
                const totalBefore = lastData.reduce((sum, d) => sum + d.metric_before, 0);
                createChart(lastData, totalBefore, lastMeanEffectiveness || 0);
            } else if (currentData) {
                updateVisualization();
            }
            const newUrl = new URL(window.location);
            newUrl.searchParams.delete('plot');
            window.history.replaceState({}, document.title, newUrl);
        });

        tabSecondary.addEventListener('click', () => {
            tabSecondary.classList.add('active');
            tabEffectiveness.classList.remove('active');
            chartTabEffectiveness.style.display = 'none';
            chartTabSecondary.style.display = 'block';
            updateInfoText();
            if (lastData) {
                createValueCostPlot(lastData);
            } else if (currentData) {
                createValueCostPlot(processData());
            }
            const newUrl = new URL(window.location);
            newUrl.searchParams.set('plot', 'value-cost');
            window.history.replaceState({}, document.title, newUrl);
        });
        loadUrlBtn.addEventListener('click', loadFromUrl);
        urlInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter') {
                loadFromUrl();
            }
        });

        async function loadFromUrl() {
            const url = urlInput.value.trim();
            if (!url) {
                alert('Please enter a URL');
                return;
            }

            // Convert GitHub URLs to raw URLs
            let fetchUrl = url;
            if (url.includes('github.com') && !url.includes('raw.githubusercontent.com')) {
                // Convert github.com/user/repo/blob/branch/file to raw.githubusercontent.com/user/repo/branch/file
                fetchUrl = url.replace('github.com', 'raw.githubusercontent.com').replace('/blob/', '/');
            }

            try {
                loadUrlBtn.disabled = true;
                loadUrlBtn.textContent = 'Loading...';

                const response = await fetch(fetchUrl);
                if (!response.ok) {
                    throw new Error(`HTTP error! status: ${response.status}`);
                }

                const text = await response.text();
                try {
                    const jsonData = JSON.parse(text);
                    loadData(jsonData);
                    updateDataSourceDisplay(url);
                    uploadSection.style.display = 'none';
                    appSection.style.display = 'block';
                    controlsSection.style.display = 'block';
                    vizSection.style.display = 'block';
                    updateVisualization();

                    // Update URL to include the data parameter
                    const newUrl = new URL(window.location);
                    newUrl.searchParams.set('data', url);
                    window.history.replaceState({}, document.title, newUrl);
                } catch (parseError) {
                    alert('Error parsing JSON from URL: ' + parseError.message);
                }
            } catch (error) {
                alert('Error loading URL: ' + error.message);
            } finally {
                loadUrlBtn.disabled = false;
                loadUrlBtn.textContent = 'Load from URL';
            }
        }

        effectivenessType.addEventListener('change', () => {
            if (currentData) {
                const selectedId = selectedBlock ? selectedBlock.block_id : null;
                updateVisualization();

                // Restore selection if it exists
                if (selectedId) {
                    const data = processData();
                    const block = data.find(d => d.block_id === selectedId);
                    if (block) {
                        selectBlock(block);
                    }
                }
            }
        });

        // PC search functionality
        pcSearchBtn.addEventListener('click', searchForPC);
        pcSearch.addEventListener('keypress', (e) => {
            if (e.key === 'Enter') {
                searchForPC();
            }
        });

        function searchForPC() {
            if (!currentData) {
                alert('Please load data first');
                return;
            }

            const input = pcSearch.value.trim();
            if (!input) {
                // Empty input means unselect
                selectBlock(null);
                return;
            }

            // Parse hex (0x...) or decimal
            let pcValue;
            if (input.toLowerCase().startsWith('0x')) {
                pcValue = parseInt(input, 16);
            } else {
                pcValue = parseInt(input, 10);
            }

            if (isNaN(pcValue)) {
                alert('Invalid PC address. Please enter a valid hex (0x...) or decimal number.');
                return;
            }

            // Find the superblock that contains this PC in any of its basic blocks
            // Each instruction is 4 bytes, so PC advances by 4
            const data = processData();
            const block = data.find(d => {
                if (d.is_other) return false; // Skip "Other" grouped blocks
                return d.original_blocks.some(b => {
                    const endPc = b.start_pc + b.instructions.length * 4;
                    return pcValue >= b.start_pc && pcValue < endPc;
                });
            });

            if (block) {
                selectBlock(block);
            } else {
                alert(`No block found containing PC: 0x${pcValue.toString(16)} (${pcValue})`);
            }
        }

        pageTitle.addEventListener('click', () => {
            currentData = null;
            currentLabels = {};
            selectedBlock = null;
            uploadSection.style.display = 'block';
            appSection.style.display = 'none';
            controlsSection.style.display = 'none';
            document.getElementById('selectedBlockSection').style.display = 'none';
            document.getElementById('infoSection').style.display = 'none';
            vizSection.style.display = 'none';
            fileInput.value = '';
            urlInput.value = '';
            updateDataSourceDisplay('');
            // Clear URL parameters when returning to upload screen
            window.history.replaceState({}, document.title, window.location.pathname);
        });

        // Check for URL parameter on page load
        window.addEventListener('DOMContentLoaded', () => {
            const urlParams = new URLSearchParams(window.location.search);
            const dataUrl = urlParams.get('url') || urlParams.get('data');
            const plotParam = urlParams.get('plot');

            if (dataUrl) {
                urlInput.value = dataUrl;
                loadFromUrl();
            }
            if (plotParam === 'value-cost') {
                tabSecondary.click();
            } else {
                updateInfoText();
            }
        });

        function handleFile(file) {
            if (!file.name.endsWith('.json')) {
                alert('Please upload a JSON file');
                return;
            }

            const reader = new FileReader();
            reader.onload = (e) => {
                try {
                    const jsonData = JSON.parse(e.target.result);
                    loadData(jsonData);
                    updateDataSourceDisplay('');
                    uploadSection.style.display = 'none';
                    appSection.style.display = 'block';
                    controlsSection.style.display = 'block';
                    vizSection.style.display = 'block';
                    updateVisualization();
                } catch (error) {
                    alert('Error parsing JSON file: ' + error.message);
                }
            };
            reader.readAsText(file);
        }

        function updateApcDataV1(apc) {
            // Normalize old single-block format into original_blocks array
            apc.original_blocks = [{
                start_pc: apc.original_block.start_pc,
                instructions: apc.original_block.statements,
            }];
            return apc;
        }

        // json is a direct list of APCs
        function loadDataV0(jsonData) {
            return { data: jsonData.map(updateApcDataV1), labels: {} };
        }

        // json with APCs and labels but no version
        function loadDataV1(jsonData) {
            return { data: jsonData.apcs.map(updateApcDataV1), labels: jsonData.labels };
        }

        // json in version 2 or 3. The difference is that the `apc_candidate_file` field doesn't exist,
        // which is not used in the visualization anyway, so we can use the same loader for both versions.
        function loadDataV2V3(jsonData) {
            const apcs = jsonData.apcs.map(apc => {
                apc.original_blocks = [apc.original_block];
                return apc;
            });
            return { data: apcs, labels: jsonData.labels };
        }

        // json in version 4: original_blocks is a list of BasicBlocks (may be more than one).
        function loadDataV4(jsonData) {
            return { data: jsonData.apcs, labels: jsonData.labels };
        }

        function loadData(jsonData) {
            // Handle backward compatibility with older json formats
            let result;
            if (Array.isArray(jsonData)) {
                result = loadDataV0(jsonData);
            } else if (!('version' in jsonData)) {
                result = loadDataV1(jsonData);
            } else {
                switch (jsonData.version) {
                    case 2: result = loadDataV2V3(jsonData); break;
                    case 3: result = loadDataV2V3(jsonData); break;
                    case 4: result = loadDataV4(jsonData); break;
                    default: throw new Error(`Unsupported version: ${jsonData.version}`);
                }
            }
            currentData = result.data;
            currentLabels = result.labels;
            // Show right-side panes when data is loaded
            document.getElementById('selectedBlockSection').style.display = 'block';
            document.getElementById('infoSection').style.display = 'block';
            controlsSection.style.display = 'block';
            updateInfoText();
        }

        function getMetricValues(item, effType) {
            switch (effType) {
                case 'cost':
                    return { before: item.cost_before, after: item.cost_after };
                case 'main_columns':
                    return { before: item.stats.before.main_columns, after: item.stats.after.main_columns };
                case 'constraints':
                    return { before: item.stats.before.constraints, after: item.stats.after.constraints };
                case 'bus_interactions':
                    return { before: item.stats.before.bus_interactions, after: item.stats.after.bus_interactions };
                default:
                    throw new Error(`Unknown effectiveness type: ${effType}`);
            }
        }

        function calculateEffectiveness(item, effType) {
            const { before, after } = getMetricValues(item, effType);
            return before / after;
        }

        function formatMetric(count) {
            if (count >= 1e9) {
                return (count / 1e9).toFixed(1) + 'B';
            } else if (count >= 1e6) {
                return (count / 1e6).toFixed(1) + 'M';
            } else if (count >= 1e3) {
                return (count / 1e3).toFixed(1) + 'K';
            } else {
                return count.toFixed(0);
            }
        }

        function processData() {
            const effType = effectivenessType.value;
            const processed = currentData.map(item => {
                const { before, after } = getMetricValues(item, effType);
                const metric_before = before * item.execution_frequency;
                const metric_after = after * item.execution_frequency;
                const value = (before - after) * item.execution_frequency;
                const density = metric_after > 0 ? value / after : 0;
                const lastBlock = item.original_blocks[item.original_blocks.length - 1];
                const block_pcs = item.original_blocks.map(b => b.start_pc);
                return {
                    start_pc: item.original_blocks[0].start_pc,
                    block_id: block_pcs.map(pc => '0x' + pc.toString(16)).join(','),
                    block_pcs,
                    original_blocks: item.original_blocks,
                    end_pc: lastBlock.start_pc + lastBlock.instructions.length * 4,
                    effectiveness: before / after,
                    instructions: item.original_blocks.reduce((sum, b) => sum + b.instructions.length, 0),
                    software_version_cells: metric_before, // alias used throughout charting
                    metric_before,
                    metric_after,
                    metric_before_raw: before,
                    metric_after_raw: after,
                    value,
                    density,
                    width_before: item.width_before,
                    execution_frequency: item.execution_frequency,
                    statements: item.original_blocks.flatMap(b => b.instructions),  // flat list for legacy uses
                    stats_after: item.stats.after  // Keep APC stats
                };
            });

            // Sort by metric_before (matches Python)
            processed.sort((a, b) => b.metric_before - a.metric_before);

            return processed;
        }

        function updateVisualization() {
            const data = processData();
            const totalBefore = data.reduce((sum, d) => sum + d.metric_before, 0);
            const totalAfter = data.reduce((sum, d) => sum + d.metric_after, 0);

            // Calculate weighted mean (weight by cost after, matches Python)
            const meanEffectiveness = totalAfter > 0
                ? data.reduce((sum, d) => sum + d.effectiveness * d.metric_after, 0) / totalAfter
                : 0;

            lastData = data;
            lastMeanEffectiveness = meanEffectiveness;

            // Create visualizations
            createChart(data, totalBefore, meanEffectiveness);
            if (chartTabSecondary.style.display !== 'none') {
                createValueCostPlot(data);
            }

            // Show all code in the code panel
            showAllCode();

            // Create labels summary table if there are labels
            createLabelsSummary();

            // Check for block parameter in URL and select it
            const urlParams = new URLSearchParams(window.location.search);
            const blockParam = urlParams.get('block');
            if (blockParam) {
                // blockParam is a comma-separated list of hex PCs (block_id)
                const block = data.find(d => d.block_id === blockParam);
                if (block) {
                    selectBlock(block);
                }
            }
        }

        function aggregateLabelData() {
            const effType = effectivenessType.value;
            const labelStats = {};

            // Sort blocks by PC to process them in order
            const sortedBlocks = [...currentData].sort((a, b) =>
                a.original_blocks[0].start_pc - b.original_blocks[0].start_pc
            );

            // Track current active label(s)
            let currentActiveLabels = [];

            // Iterate through all blocks in PC order
            sortedBlocks.forEach(item => {
                // Check all basic block PCs for label matches
                const matchingPcKey = item.original_blocks
                    .map(b => b.start_pc.toString())
                    .find(key => currentLabels[key] && currentLabels[key].length > 0);

                // Check if this block starts a new label
                if (matchingPcKey) {
                    currentActiveLabels = currentLabels[matchingPcKey];

                    // Initialize label stats if needed
                    currentActiveLabels.forEach(label => {
                        if (!labelStats[label]) {
                            labelStats[label] = {
                                label: label,
                                pc: item.original_blocks[0].start_pc,
                                blocks: [],
                                totalTraceCells: 0,
                                totalCostBefore: 0,
                                totalCostAfter: 0
                            };
                        }
                    });
                }

                // Assign this block to all current active labels
                currentActiveLabels.forEach(label => {
                    labelStats[label].blocks.push(item);

                    // Add trace cells (same as x-axis in chart)
                    labelStats[label].totalTraceCells += item.width_before * item.execution_frequency;

                    // Aggregate costs based on effectiveness type
                    switch (effType) {
                        case 'cost':
                            labelStats[label].totalCostBefore += item.cost_before;
                            labelStats[label].totalCostAfter += item.cost_after;
                            break;
                        case 'main_columns':
                            labelStats[label].totalCostBefore += item.stats.before.main_columns;
                            labelStats[label].totalCostAfter += item.stats.after.main_columns;
                            break;
                        case 'constraints':
                            labelStats[label].totalCostBefore += item.stats.before.constraints;
                            labelStats[label].totalCostAfter += item.stats.after.constraints;
                            break;
                        case 'bus_interactions':
                            labelStats[label].totalCostBefore += item.stats.before.bus_interactions;
                            labelStats[label].totalCostAfter += item.stats.after.bus_interactions;
                            break;
                    }
                });
            });

            // Calculate effectiveness for each label (weighted by cost)
            const labelArray = Object.values(labelStats).map(stat => {
                // Process and sort blocks
                const processedBlocks = stat.blocks.map(block => ({
                    pc: block.original_blocks[0].start_pc,
                    block_pcs: block.original_blocks.map(b => b.start_pc),
                    traceCells: block.width_before * block.execution_frequency,
                    effectiveness: calculateEffectiveness(block, effType),
                    instructions: block.original_blocks.reduce((sum, b) => sum + b.instructions.length, 0)
                }));

                // Sort blocks by the same criteria as the current label sort
                sortBlocksInLabel(processedBlocks);

                return {
                    label: stat.label,
                    pc: stat.pc,
                    traceCells: stat.totalTraceCells,
                    costBefore: stat.totalCostBefore,
                    costAfter: stat.totalCostAfter,
                    effectiveness: stat.totalCostBefore / stat.totalCostAfter,
                    blockCount: stat.blocks.length,
                    blocks: processedBlocks
                };
            });

            return labelArray;
        }

        function createValueCostPlot(data) {
            const container = document.getElementById('chartValueCost');
            if (!container) return;
            d3.select(container).selectAll('*').remove();

            // Sort by density (value / cost_after)
            const sorted = [...data].sort((a, b) => b.density - a.density);

            if (sorted.length === 0) {
                container.innerHTML = '<span class="text-muted">No data available for value-cost plot.</span>';
                return;
            }

            // Build cumulative arrays (cost scaled by execution frequency)
            let cumulativeCost = 0;
            let cumulativeValue = 0;
            let cumulativeBefore = 0;
            const series = sorted.map((d, idx) => {
                cumulativeCost += d.metric_after_raw;
                cumulativeValue += d.value;
                cumulativeBefore += d.metric_before;
                return { x: cumulativeCost, y: cumulativeValue, beforeCum: cumulativeBefore, block: d, idx };
            });
            const totalSoftwareCost = data.reduce((sum, d) => sum + d.metric_before, 0);

            const margin = { top: 20, right: 20, bottom: 40, left: 60 };
            const containerWidth = container.clientWidth || 300;
            const width = containerWidth - margin.left - margin.right;
            const height = 270 - margin.top - margin.bottom;

            // Avoid log(0): start domain at min positive cost
            const minCost = d3.min(series, d => d.x) || 1;
            const maxCost = d3.max(series, d => d.x) || minCost;
            const xScale = d3.scaleLog()
                .domain([Math.max(1, minCost), maxCost])
                .range([0, width]);

            const yScale = d3.scaleLinear()
                .domain([0, Math.max(d3.max(series, d => d.y), totalSoftwareCost)])
                .range([height, 0]);

            const svg = d3.select(container)
                .append('svg')
                .attr('width', width + margin.left + margin.right)
                .attr('height', height + margin.top + margin.bottom)
                .append('g')
                .attr('transform', `translate(${margin.left},${margin.top})`);

            // Hover guide lines
            const guideH = svg.append('line')
                .attr('stroke', '#bbbbbb')
                .attr('stroke-dasharray', '4 4')
                .attr('stroke-width', 1)
                .style('display', 'none');
            const guideV = svg.append('line')
                .attr('stroke', '#bbbbbb')
                .attr('stroke-dasharray', '4 4')
                .attr('stroke-width', 1)
                .style('display', 'none');
            const guideHText = svg.append('text')
                .style('fill', '#555')
                .style('font-size', '11px')
                .style('display', 'none');
            const guideVText = svg.append('text')
                .style('fill', '#555')
                .style('font-size', '11px')
                .style('display', 'none');

            const line = d3.line()
                .x(d => xScale(d.x))
                .y(d => yScale(d.y))
                .curve(d3.curveLinear);

            // Background for deselect on click (send behind everything)
            svg.append('rect')
                .attr('width', width)
                .attr('height', height)
                .attr('fill', 'transparent')
                .attr('pointer-events', 'all')
                .on('click', () => selectBlock(null))
                .lower();

            svg.append('path')
                .datum(series)
                .attr('fill', 'none')
                .attr('stroke', '#0d6efd')
                .attr('stroke-width', 2)
                .attr('d', line);

            // Horizontal lines at percentages of total software cost (upper bound)
            if (totalSoftwareCost > 0) {
                const percentages = [0.2, 0.4, 0.6, 0.8, 1.0];
                percentages.forEach(pct => {
                    const yVal = totalSoftwareCost * pct;
                    svg.append('line')
                        .attr('x1', 0)
                        .attr('x2', width)
                        .attr('y1', yScale(yVal))
                        .attr('y2', yScale(yVal))
                        .attr('stroke', '#888')
                        .attr('stroke-dasharray', '4 4')
                        .attr('stroke-width', pct === 1 ? 1.5 : 1);

                    const label = pct === 1
                        ? `Software cost: ${formatMetric(yVal)}`
                        : `${Math.round(pct * 100)}%`;

                    svg.append('text')
                        .attr('x', 5)
                        .attr('y', yScale(yVal) - 6)
                        .attr('text-anchor', 'start')
                        .style('fill', '#666')
                        .style('font-size', '11px')
                        .text(label);
                });
            }

            // Point labels for selected counts (powers of 10 and 3×powers of 10)
            const refCounts = [];
            const maxCount = series.length;
            for (let p = 0; Math.pow(10, p) <= maxCount; p++) {
                const pow = Math.pow(10, p);
                if (p > 0) {
                    refCounts.push(pow);
                }
                const three = 3 * pow;
                if (three <= maxCount) refCounts.push(three);
            }
            const labelYOffset = 14;
            refCounts.forEach((count, idx) => {
                if (count > series.length || count < 1) return;
                const point = series[count - 1];

                svg.append('text')
                    .attr('x', xScale(point.x))
                    .attr('y', yScale(point.y) - labelYOffset)
                    .attr('text-anchor', 'middle')
                    .style('fill', '#555')
                    .style('font-size', '11px')
                    .text(`${count} APCs`);
            });

            // Tooltip
            const tooltip = d3.select('body').append('div')
                .attr('class', 'tooltip')
                .style('opacity', 0);

            // Secondary hover line text
            const guideHText2 = svg.append('text')
                .style('fill', '#555')
                .style('font-size', '11px')
                .style('display', 'none');

            const baseColor = '#0d6efd';
            const hoverColor = '#0b5ed7';
            const selectedColor = '#d32f2f';

            // Points
            svg.selectAll('.value-point')
                .data(series)
                .enter()
                .append('circle')
                .attr('class', 'value-point')
                .attr('cx', d => xScale(d.x))
                .attr('cy', d => yScale(d.y))
                .attr('r', POINT_RADIUS)
                .attr('fill', baseColor)
                .attr('stroke', baseColor)
                .attr('stroke-width', 1)
                .on('mouseover', function (event, d) {
                    const isSelected = selectedBlock && !selectedBlock.is_other && selectedBlock.block_id === d.block.block_id;
                    d3.select(this)
                        .attr('r', isSelected ? SELECTED_RADIUS : HOVER_RADIUS)
                        .attr('fill', isSelected ? selectedColor : hoverColor)
                        .attr('stroke', isSelected ? selectedColor : hoverColor);
                    tooltip.transition().duration(200).style('opacity', 0.9);
                    tooltip.html(blockTooltipHtml(d.block))
                        .style('left', (event.pageX + 10) + 'px')
                        .style('top', (event.pageY + 10) + 'px');

                    // Guides
                    guideH
                        .attr('x1', 0)
                        .attr('x2', width)
                        .attr('y1', yScale(d.y))
                        .attr('y2', yScale(d.y))
                        .style('display', 'block');
                    guideV
                        .attr('x1', xScale(d.x))
                        .attr('x2', xScale(d.x))
                        .attr('y1', 0)
                        .attr('y2', height)
                        .style('display', 'block');

                    const pctSaved = totalSoftwareCost > 0 ? (d.y / totalSoftwareCost * 100) : 0;
                    const pctAccel = totalSoftwareCost > 0 ? (d.beforeCum / totalSoftwareCost * 100) : 0;
                    const reductionFactor = (100 - pctSaved) > 0 ? (100 / (100 - pctSaved)) : Infinity;

                    const isFirstHalf = xScale(d.x) < (width / 2);
                    // Horizontal labels: right side for first half, left side for second half
                    const hLabelX = isFirstHalf ? width - 5 : 5;
                    const hAnchor = isFirstHalf ? 'end' : 'start';
                    guideHText
                        .attr('x', hLabelX)
                        .attr('y', yScale(d.y) - 6)
                        .attr('text-anchor', hAnchor)
                        .style('display', 'block')
                        .text(`Accelerated prover cost: ${formatMetric(d.beforeCum)} (${pctAccel.toFixed(1)}%)`);

                    guideHText2
                        .attr('x', hLabelX)
                        .attr('y', yScale(d.y) + 14)
                        .attr('text-anchor', hAnchor)
                        .style('display', 'block')
                        .text(`Saved prover cost: ${formatMetric(d.y)} (${pctSaved.toFixed(1)}%, ${reductionFactor === Infinity ? '∞' : reductionFactor.toFixed(2) + 'x'} reduction)`);

                    guideVText
                        .attr('x', isFirstHalf ? xScale(d.x) + 4 : xScale(d.x) - 4)
                        .attr('y', 20)
                        .attr('text-anchor', isFirstHalf ? 'start' : 'end')
                        .style('display', 'block')
                        .text(`Verifier cost: ${formatMetric(d.x)} · APCs: ${d.idx + 1}`);
                })
                .on('mouseout', function () {
                    tooltip.transition().duration(300).style('opacity', 0);
                    updateValueCostPointStyles();
                    guideH.style('display', 'none');
                    guideV.style('display', 'none');
                    guideHText.style('display', 'none');
                    guideHText2.style('display', 'none');
                    guideVText.style('display', 'none');
                })
                .on('click', function (event, d) {
                    event.stopPropagation();
                    selectBlock(d.block);
                });

            // Apply selection styling initially
            updateValueCostPointStyles();

            // Axes
            svg.append('g')
                .attr('transform', `translate(0,${height})`)
                .call(d3.axisBottom(xScale).ticks(5, "~s"));

            svg.append('g')
                .call(d3.axisLeft(yScale).ticks(5).tickFormat(formatMetric));

            // Labels
            svg.append('text')
                .attr('transform', `translate(${width / 2}, ${height + margin.bottom - 5})`)
                .style('text-anchor', 'middle')
                .text('Added verifier cost (log)');

            svg.append('text')
                .attr('transform', 'rotate(-90)')
                .attr('y', 0 - margin.left + 15)
                .attr('x', 0 - (height / 2))
                .style('text-anchor', 'middle')
                .text('Saved prover cost');
        }

        function sortBlocksInLabel(blocks) {
            // Sort blocks by trace cells (descending) by default
            blocks.sort((a, b) => {
                const column = currentLabelSort.column;
                let aVal, bVal;

                switch (column) {
                    case 'traceCells':
                        aVal = a.traceCells;
                        bVal = b.traceCells;
                        break;
                    case 'effectiveness':
                        aVal = a.effectiveness;
                        bVal = b.effectiveness;
                        break;
                    case 'pc':
                        aVal = a.pc;
                        bVal = b.pc;
                        break;
                    case 'blockCount':
                        // For blockCount, sort by traceCells instead
                        aVal = a.traceCells;
                        bVal = b.traceCells;
                        break;
                    default:
                        aVal = a.traceCells;
                        bVal = b.traceCells;
                }

                if (currentLabelSort.direction === 'asc') {
                    return aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
                } else {
                    return aVal > bVal ? -1 : aVal < bVal ? 1 : 0;
                }
            });
        }

        let currentLabelSort = { column: 'traceCells', direction: 'desc' };

        function createLabelsSummary() {
            const labelsSummarySection = document.getElementById('labelsSummarySection');
            const labelsTableContainer = document.getElementById('labelsTableContainer');

            // Check if there are any labels
            if (Object.keys(currentLabels).length === 0) {
                labelsSummarySection.style.display = 'none';
                return;
            }

            const labelData = aggregateLabelData();

            if (labelData.length === 0) {
                labelsSummarySection.style.display = 'none';
                return;
            }

            labelsSummarySection.style.display = 'block';

            // Set up collapsible behavior (only once)
            const labelsHeader = document.getElementById('labelsHeader');
            const labelsContent = document.getElementById('labelsContent');
            const collapseIcon = labelsHeader.querySelector('.collapse-icon');

            // Remove old listener if exists and add new one
            labelsHeader.replaceWith(labelsHeader.cloneNode(true));
            const newLabelsHeader = document.getElementById('labelsHeader');
            const newCollapseIcon = newLabelsHeader.querySelector('.collapse-icon');

            newLabelsHeader.addEventListener('click', function () {
                const content = document.getElementById('labelsContent');
                const icon = this.querySelector('.collapse-icon');

                if (content.style.display === 'none') {
                    content.style.display = 'block';
                    icon.classList.remove('collapsed');
                } else {
                    content.style.display = 'none';
                    icon.classList.add('collapsed');
                }
            });

            // Sort data
            sortLabelData(labelData, currentLabelSort.column, currentLabelSort.direction);

            // Get metric name for column header
            const effType = effectivenessType.options[effectivenessType.selectedIndex].text;

            // Create table
            let tableHtml = `
                <table class="table labels-table">
                    <thead>
                        <tr>
                            <th style="width: 30px;"></th>
                            <th data-column="pc">PC</th>
                            <th data-column="label">Label</th>
                            <th data-column="blockCount">Blocks</th>
                            <th data-column="traceCells">Trace Cells</th>
                            <th data-column="effectiveness">Effectiveness (${effType})</th>
                        </tr>
                    </thead>
                    <tbody>
            `;

            labelData.forEach((row, idx) => {
                const labelId = `label-${idx}`;
                tableHtml += `
                    <tr class="label-row" data-pc="${row.pc}" data-label-id="${labelId}">
                        <td><span class="expand-icon">►</span></td>
                        <td>0x${row.pc.toString(16)}</td>
                        <td class="label-cell">${escapeHtml(row.label)}</td>
                        <td>${row.blockCount}</td>
                        <td>${formatMetric(row.traceCells)}</td>
                        <td>${row.effectiveness.toFixed(2)}</td>
                    </tr>
                `;

                // Add blocks as separate rows
                row.blocks.forEach(block => {
                    tableHtml += `
                        <tr class="blocks-detail-row" id="${labelId}-detail-${block.pc}" style="display: none;" data-block-pc="${block.pc}" data-label-id="${labelId}">
                            <td></td>
                            <td>0x${block.pc.toString(16)}</td>
                            <td>...</td>
                            <td></td>
                            <td>${formatMetric(block.traceCells)}</td>
                            <td>${block.effectiveness.toFixed(2)}</td>
                        </tr>
                    `;
                });
            });

            tableHtml += `
                    </tbody>
                </table>
            `;

            labelsTableContainer.innerHTML = tableHtml;

            // Add sort indicators
            document.querySelectorAll('.labels-table th').forEach(th => {
                const column = th.getAttribute('data-column');
                th.classList.remove('sorted-asc', 'sorted-desc');
                if (column === currentLabelSort.column) {
                    th.classList.add(`sorted-${currentLabelSort.direction}`);
                }
            });

            // Add click handlers for sorting
            document.querySelectorAll('.labels-table th').forEach(th => {
                th.addEventListener('click', function () {
                    const column = this.getAttribute('data-column');
                    if (currentLabelSort.column === column) {
                        // Toggle direction
                        currentLabelSort.direction = currentLabelSort.direction === 'asc' ? 'desc' : 'asc';
                    } else {
                        // New column, default to descending
                        currentLabelSort.column = column;
                        currentLabelSort.direction = 'desc';
                    }
                    createLabelsSummary();
                });
            });

            // Add click handlers for label rows (expand/collapse)
            document.querySelectorAll('.label-row').forEach(tr => {
                const expandIcon = tr.querySelector('.expand-icon');
                const labelId = tr.getAttribute('data-label-id');

                // Click on expand icon or first cell to expand
                const expandCell = tr.querySelector('td:first-child');
                expandCell.addEventListener('click', function (e) {
                    e.stopPropagation();

                    // Find all detail rows for this label
                    const detailRows = document.querySelectorAll(`[data-label-id="${labelId}"].blocks-detail-row`);
                    const isExpanded = expandIcon.classList.contains('expanded');

                    if (isExpanded) {
                        detailRows.forEach(row => row.style.display = 'none');
                        expandIcon.classList.remove('expanded');
                    } else {
                        detailRows.forEach(row => row.style.display = 'table-row');
                        expandIcon.classList.add('expanded');
                    }
                });

                // Click on rest of row to select first block
                tr.addEventListener('click', function (e) {
                    if (e.target.closest('td:first-child')) return; // Ignore if clicking expand cell
                    const pc = parseInt(this.getAttribute('data-pc'));
                    const data = processData();
                    const block = data.find(d => d.block_pcs && d.block_pcs.includes(pc));
                    if (block) {
                        selectBlock(block);
                    }
                });
            });

            // Add click handlers for individual block rows
            document.querySelectorAll('.blocks-detail-row').forEach(tr => {
                tr.addEventListener('click', function () {
                    const pc = parseInt(this.getAttribute('data-block-pc'));
                    const data = processData();
                    const block = data.find(d => d.block_pcs && d.block_pcs.includes(pc));
                    if (block) {
                        selectBlock(block);
                    }
                });
            });
        }

        function sortLabelData(data, column, direction) {
            data.sort((a, b) => {
                let aVal = a[column];
                let bVal = b[column];

                // Special handling for label (case-insensitive string sort)
                if (column === 'label') {
                    aVal = aVal.toLowerCase();
                    bVal = bVal.toLowerCase();
                }

                if (direction === 'asc') {
                    return aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
                } else {
                    return aVal > bVal ? -1 : aVal < bVal ? 1 : 0;
                }
            });
        }

        function blockTooltipHtml(b) {
            if (b.is_other) {
                return `<strong>Other (${b.count} APCs)</strong><br/>` +
                    `Execution frequency: ${formatMetric(b.execution_frequency)}<br/>` +
                    `Cost (software version): ${formatMetric(b.software_version_cells)}<br/>` +
                    `Cost (accelerated): ${formatMetric(b.metric_after || 0)}<br/>` +
                    `Effectiveness: ${b.effectiveness.toFixed(2)}<br/>` +
                    `Verifier cost (accelerated): n/a<br/>` +
                    `APC size (total): ${b.stats_after.main_columns || 'N/A'} cols, ${b.stats_after.bus_interactions || 'N/A'} bus, ${b.stats_after.constraints || 'N/A'} constraints`;
            }
            const pcsDisplay = b.block_pcs.length === 1
                ? `0x${b.start_pc.toString(16)}`
                : b.block_pcs.map(pc => '0x' + pc.toString(16)).join(', ');
            return `<strong>PC: ${pcsDisplay}</strong><br/>` +
                `Execution frequency: ${formatMetric(b.execution_frequency)}<br/>` +
                `Instructions: ${b.instructions}<br/>` +
                `Cost (software version): ${formatMetric(b.metric_before)}<br/>` +
                `Cost (accelerated): ${formatMetric(b.metric_after)}<br/>` +
                `Effectiveness: ${b.effectiveness.toFixed(2)}<br/>` +
                `Verifier cost (accelerated): ${formatMetric(b.metric_after_raw)}<br/>` +
                `Density (saved cost / verifier cost): ${formatMetric(b.density)}<br/>` +
                `APC size: ${b.stats_after.main_columns} cols, ${b.stats_after.bus_interactions} bus, ${b.stats_after.constraints} constraints`;
        }

        function highlightElements(blockId, isHover = true) {
            // Highlight corresponding bar
            d3.selectAll('.bar')
                .classed('bar-highlight', d => d.block_id === blockId);
        }

        function clearHighlights() {
            d3.selectAll('.bar').classed('bar-highlight', false);
        }

        function updateBarSelection() {
            if (!selectedBlock || selectedBlock.is_other) return;
            d3.selectAll('.bar')
                .classed('bar-selected', d => d.block_id === selectedBlock.block_id);
        }

        function updateInfoText() {
            if (!plotInfoText) return;
            const showingEffectiveness = chartTabSecondary.style.display === 'none';
            if (showingEffectiveness) {
                plotInfoText.innerHTML = 'The plot shows each basic block, sorted the proving cost it causes in the software execution:<br>• The <strong>width</strong> corresponds to its cost before acceleration.<br>• The <strong>height</strong> shows the factor by which the cost is reduced after acceleration.';
            } else {
                plotInfoText.innerHTML = 'The plot shows the trade-off between added verifier cost and saved proving cost for accelerating basic blocks:<br>• The <strong>x-axis</strong> shows the cumulative verifier cost of accelerated blocks. APCs are added by decreasing density, i.e., the saved proving cost divided by the added verification cost.<br>• The <strong>y-axis</strong> shows the cumulative saved cost achieved by accelerating those blocks.';
            }
        }

        function updateValueCostPointStyles() {
            const baseColor = '#0d6efd';
            const selectedColor = '#d32f2f';
            d3.selectAll('.value-point').each(function (d) {
                const isSelected = selectedBlock && !selectedBlock.is_other && selectedBlock.block_id === d.block.block_id;
                d3.select(this)
                    .attr('fill', isSelected ? selectedColor : baseColor)
                    .attr('stroke', isSelected ? selectedColor : baseColor)
                    .attr('stroke-width', 1)
                    .attr('r', isSelected ? SELECTED_RADIUS : POINT_RADIUS);
            });
        }

        function selectBlock(blockData) {
            // Clear previous selection
            d3.selectAll('.bar').classed('bar-selected', false);

            // Set new selection
            selectedBlock = blockData;

            // Update URL
            const newUrl = new URL(window.location);
            if (blockData && !blockData.is_other) {
                // Highlight selected bar
                d3.selectAll('.bar')
                    .classed('bar-selected', d => d.block_id === blockData.block_id);

                // Update URL with block parameter (block_id is comma-separated hex PCs)
                newUrl.searchParams.set('block', blockData.block_id);

                // Update the PC search input field with the entry PC
                pcSearch.value = '0x' + blockData.start_pc.toString(16);
            } else {
                // Clear block parameter if deselecting or selecting "Other"
                newUrl.searchParams.delete('block');

                // Clear the PC search input field
                pcSearch.value = '';
            }
            window.history.replaceState({}, document.title, newUrl);

            // Update code panel selection
            updateCodePanelSelection(blockData);
            // Update value-cost plot point styling
            updateValueCostPointStyles();
            // Update bar selection styling (for re-rendered charts)
            updateBarSelection();
        }

        function showAllCode() {
            const codePanel = document.getElementById('codePanel');

            // Get all blocks sorted by start_pc
            const allBlocks = processData().sort((a, b) => a.start_pc - b.start_pc);

            let codeHtml = '';
            allBlocks.forEach((block, index) => {
                if (block.is_other) return; // Skip "Other" grouped blocks

                // Check for gap between previous superblock and current superblock
                if (index > 0) {
                    const prevBlock = allBlocks[index - 1];
                    if (!prevBlock.is_other) {
                        const prevBlockEnd = prevBlock.end_pc;
                        const gap = block.start_pc - prevBlockEnd;
                        const missingInstructions = gap / 4;

                        if (missingInstructions > 0) {
                            codeHtml += `
                                <div style="padding: 10px; margin: 10px 0; background-color: #f0f0f0; border-left: 3px solid #999; font-style: italic; color: #666;">
                                    ... ${missingInstructions} instruction${missingInstructions !== 1 ? 's' : ''} not shown (0x${prevBlockEnd.toString(16)} - 0x${(block.start_pc - 4).toString(16)}) ...
                                </div>
                            `;
                        }
                    }
                }

                // Check all basic block PCs for labels
                let labelData = '';
                for (const basicBlock of block.original_blocks) {
                    const pcKey = basicBlock.start_pc.toString();
                    if (currentLabels[pcKey] && currentLabels[pcKey].length > 0) {
                        const labels = currentLabels[pcKey];
                        const labelsHtml = labels.map(label =>
                            `<div class="label-name">${escapeHtml(label)}</div>`
                        ).join('');
                        labelData = JSON.stringify(labels);
                        codeHtml += `
                            <div class="label-line" data-pc="${block.start_pc}" data-labels='${labelData}'>
                                ${labelsHtml}
                            </div>
                        `;
                        break; // Use first matching label for the sticky header
                    }
                }

                const blockId = `block-${block.block_pcs.join('_')}`;
                const pcsText = block.block_pcs.length === 1
                    ? `PC: 0x${block.start_pc.toString(16)}`
                    : `PCs: ${block.block_pcs.map(pc => '0x' + pc.toString(16)).join(', ')}`;
                const headerText = `${pcsText} | Cost (software): ${formatMetric(block.metric_before)} | Effectiveness: ${block.effectiveness.toFixed(2)} | Instructions: ${block.instructions}`;

                // Render each basic block as a sub-section within the superblock
                const linesHtml = block.original_blocks.map((basicBlock, bbIdx) => {
                    const subHeader = block.original_blocks.length > 1
                        ? `<div style="padding: 2px 8px; font-size: 0.8em; color: #888; border-top: 1px dashed #ccc; margin-top: 2px;">basic block 0x${basicBlock.start_pc.toString(16)}</div>`
                        : '';
                    const instrs = basicBlock.instructions.map((stmt, idx) =>
                        `<div class="code-line" data-pc="${basicBlock.start_pc}" data-line="${idx}">${escapeHtml(stmt)}</div>`
                    ).join('');
                    return subHeader + instrs;
                }).join('');

                codeHtml += `
                    <div class="code-block" id="${blockId}" data-pc="${block.start_pc}" data-labels='${labelData}'>
                        <div class="code-block-header">${headerText}</div>
                        ${linesHtml}
                    </div>
                `;
            });

            codePanel.innerHTML = codeHtml;

            // Add click handlers to code lines
            document.querySelectorAll('.code-line').forEach(line => {
                line.addEventListener('click', function () {
                    const pc = parseInt(this.getAttribute('data-pc'));
                    const blockData = allBlocks.find(b => b.block_pcs && b.block_pcs.includes(pc));
                    if (blockData) {
                        selectBlock(blockData);
                    }
                });
            });

            // Set up scroll listener to update sticky header
            updateStickyLabel();
            codePanel.removeEventListener('scroll', updateStickyLabel);
            codePanel.addEventListener('scroll', updateStickyLabel);

            // Set up click handler for sticky header
            const stickyHeader = document.getElementById('stickyLabelHeader');
            if (stickyHeader) {
                stickyHeader.removeEventListener('click', handleStickyLabelClick);
                stickyHeader.addEventListener('click', handleStickyLabelClick);
            }
        }

        function handleStickyLabelClick() {
            const stickyHeader = document.getElementById('stickyLabelHeader');
            const pc = stickyHeader.getAttribute('data-pc');

            if (pc) {
                const pcValue = parseInt(pc);
                const data = processData();
                const block = data.find(d => d.block_pcs && d.block_pcs.includes(pcValue));

                if (block) {
                    selectBlock(block);
                }
            }
        }

        function updateStickyLabel() {
            const codePanel = document.getElementById('codePanel');
            const stickyHeader = document.getElementById('stickyLabelHeader');

            if (!codePanel || !stickyHeader) return;

            // Find the first visible block or label
            const scrollTop = codePanel.scrollTop;
            const panelTop = codePanel.getBoundingClientRect().top;

            // Get all blocks and labels
            const elements = codePanel.querySelectorAll('.label-line, .code-block');
            let currentLabels = null;
            let currentPc = null;

            for (let i = 0; i < elements.length; i++) {
                const element = elements[i];
                const elementTop = element.offsetTop;

                // If this element is past our scroll position, use the previous label
                if (elementTop > scrollTop) {
                    break;
                }

                // Check if this element has labels
                const labelsAttr = element.getAttribute('data-labels');
                if (labelsAttr && labelsAttr !== '""' && labelsAttr !== '') {
                    try {
                        currentLabels = JSON.parse(labelsAttr);
                        currentPc = element.getAttribute('data-pc');
                    } catch (e) {
                        // Ignore parse errors
                    }
                }
            }

            // Update sticky header
            if (currentLabels && currentLabels.length > 0 && currentPc) {
                const labelHtml = currentLabels.map(label =>
                    `<div class="label-name">${escapeHtml(label)}</div>`
                ).join('');
                stickyHeader.innerHTML = labelHtml;
                stickyHeader.setAttribute('data-pc', currentPc);
                stickyHeader.classList.add('active');
            } else {
                stickyHeader.classList.remove('active');
                stickyHeader.removeAttribute('data-pc');
            }
        }

        function updateCodePanelSelection(blockData) {
            const codeInfo = document.getElementById('codeBlockInfo');

            // Remove all previous selections
            document.querySelectorAll('.code-block').forEach(block => {
                block.classList.remove('selected');
            });
            document.querySelectorAll('.code-line').forEach(line => {
                line.classList.remove('highlighted');
            });

            if (!blockData) {
                codeInfo.innerHTML = '<span class="text-muted">Click on a bar or code line to select a block</span>';
                return;
            }

            codeInfo.innerHTML = blockTooltipHtml(blockData);

            if (blockData.is_other) return;

            // Highlight the selected superblock
            const blockElement = document.getElementById(`block-${blockData.block_pcs.join('_')}`);
            if (blockElement) {
                blockElement.classList.add('selected');

                // Highlight all lines in this block
                blockElement.querySelectorAll('.code-line').forEach(line => {
                    line.classList.add('highlighted');
                });

                // Scroll within the code panel only, not the whole page
                const codePanel = document.getElementById('codePanel');
                const blockTop = blockElement.offsetTop;
                codePanel.scrollTo({ top: blockTop, behavior: 'smooth' });
            }
        }

        function clearCode() {
            // Only clear if there's no selected block
            if (!selectedBlock) {
                document.getElementById('codeBlockInfo').innerHTML = '<span class="text-muted">Click on a bar or code line to select a block</span>';
                showAllCode();
            }
        }

        function escapeHtml(text) {
            const div = document.createElement('div');
            div.textContent = text;
            return div.innerHTML;
        }

        function createChart(data, totalCells, meanEffectiveness) {
            // Clear existing chart
            d3.select('#chart').selectAll('*').remove();

            // Set dimensions and margins
            const margin = { top: 40, right: 120, bottom: 60, left: 80 };
            const containerWidth = document.getElementById('chart').clientWidth || 1200;
            const width = containerWidth - margin.left - margin.right;
            const height = 270 - margin.top - margin.bottom;

            // Group small blocks (< 0.1% threshold)
            const threshold = totalCells * 0.001;
            const largeBlocks = data.filter(d => d.software_version_cells >= threshold);
            const smallBlocks = data.filter(d => d.software_version_cells < threshold);

            let plotData = [...largeBlocks];

            if (smallBlocks.length > 0) {
                const otherCells = smallBlocks.reduce((sum, d) => sum + d.software_version_cells, 0);
                const otherEffectiveness = smallBlocks.reduce((sum, d) => sum + d.effectiveness * d.software_version_cells, 0) / otherCells;
                const otherExecFreq = smallBlocks.reduce((sum, d) => sum + d.execution_frequency, 0);
                const otherStatsAfter = smallBlocks.reduce((acc, d) => {
                    if (d.stats_after) {
                        acc.main_columns = (acc.main_columns || 0) + d.stats_after.main_columns;
                        acc.bus_interactions = (acc.bus_interactions || 0) + d.stats_after.bus_interactions;
                        acc.constraints = (acc.constraints || 0) + d.stats_after.constraints;
                    }
                    return acc;
                }, {});
                plotData.push({
                    effectiveness: otherEffectiveness,
                    software_version_cells: otherCells,
                    execution_frequency: otherExecFreq,
                    instructions: -1,
                    is_other: true,
                    count: smallBlocks.length,
                    statements: [],  // No individual statements for grouped blocks
                    stats_after: otherStatsAfter
                });
            }

            // Calculate positions
            let xPos = 0;
            plotData.forEach(d => {
                d.x = xPos;
                d.width = d.software_version_cells;
                xPos += d.width;
            });

            // Create SVG
            const svg = d3.select('#chart')
                .append('svg')
                .attr('width', width + margin.left + margin.right)
                .attr('height', height + margin.top + margin.bottom)
                .on('click', function (event) {
                    // If clicking on the background (not a bar), deselect
                    if (event.target.tagName === 'svg') {
                        selectBlock(null);
                    }
                })
                .append('g')
                .attr('transform', `translate(${margin.left},${margin.top})`);

            // Add background rect to capture clicks
            svg.append('rect')
                .attr('width', width)
                .attr('height', height)
                .style('fill', 'none')
                .style('pointer-events', 'all')
                .on('click', function (event) {
                    event.stopPropagation();
                    selectBlock(null);
                });

            // Create scales
            const xScale = d3.scaleLinear()
                .domain([0, totalCells])
                .range([0, width]);

            // Calculate 99th percentile effectiveness by trace cells
            // Sort by effectiveness ascending
            const sortedByEffectiveness = [...plotData].sort((a, b) => a.effectiveness - b.effectiveness);

            // Find the effectiveness value at 99th percentile weighted by trace cells
            let cumulativeCells = 0;
            const p99Threshold = totalCells * 0.99;
            let maxEffectivenessP99 = 0;

            for (const d of sortedByEffectiveness) {
                cumulativeCells += d.software_version_cells;
                maxEffectivenessP99 = d.effectiveness;
                if (cumulativeCells >= p99Threshold) {
                    break;
                }
            }

            // Use 99th percentile for y-axis scaling
            const yScale = d3.scaleLinear()
                .domain([0, maxEffectivenessP99 * 1.1])
                .range([height, 0]);

            // Color scale for instructions (log scale)
            const validInstructions = plotData.filter(d => !d.is_other && d.instructions > 0).map(d => d.instructions);
            const instrMin = d3.min(validInstructions) || 1;
            const instrMax = d3.max(validInstructions) || instrMin;
            const colorScale = d3.scaleSequentialLog()
                .domain([instrMin, instrMax])
                .interpolator(d3.interpolateRdYlGn);

            // Add grid
            svg.append('g')
                .attr('class', 'grid')
                .attr('transform', `translate(0,${height})`)
                .call(d3.axisBottom(xScale)
                    .tickSize(-height)
                    .tickFormat(''));

            svg.append('g')
                .attr('class', 'grid')
                .call(d3.axisLeft(yScale)
                    .tickSize(-width)
                    .tickFormat(''));

            // Create tooltip
            const tooltip = d3.select('body').append('div')
                .attr('class', 'tooltip')
                .style('opacity', 0);

            // Add bars
            svg.selectAll('.bar')
                .data(plotData)
                .enter().append('rect')
                .attr('class', 'bar')
                .attr('x', d => xScale(d.x))
                .attr('y', d => Math.max(0, yScale(d.effectiveness)))
                .attr('width', d => xScale(d.width) - xScale(0))
                .attr('height', d => height - Math.max(0, yScale(d.effectiveness)))
                .style('fill', d => d.is_other ? 'lightgray' : colorScale(d.instructions))
                .on('mouseover', function (event, d) {
                    // Show tooltip
                    tooltip.transition()
                        .duration(200)
                        .style('opacity', .9);

                    const content = blockTooltipHtml(d);

                    tooltip.html(content)
                        .style('left', (event.pageX - 50) + 'px')
                        .style('top', (event.pageY + 20) + 'px');

                    // Highlight elements (but don't change code on hover if something is selected)
                    if (!d.is_other) {
                        highlightElements(d.block_id, true);
                    }
                })
                .on('mouseout', function (d) {
                    // Hide tooltip
                    tooltip.transition()
                        .duration(500)
                        .style('opacity', 0);

                    // Clear highlights
                    clearHighlights();
                })
                .on('click', function (event, d) {
                    event.stopPropagation();
                    // Select this superblock on click
                    selectBlock(d);
                });

            // Note: Selection is handled in the selectBlock function after chart is created

            // Add "Other" label for wide enough other blocks
            plotData.filter(d => d.is_other && d.width > totalCells * 0.02).forEach(d => {
                svg.append('text')
                    .attr('x', xScale(d.x + d.width / 2))
                    .attr('y', yScale(d.effectiveness / 2))
                    .attr('text-anchor', 'middle')
                    .attr('font-size', '10px')
                    .attr('font-weight', 'bold')
                    .text(`Other (${d.count} APCs)`);
            });

            // Add mean line
            svg.append('line')
                .attr('class', 'mean-line')
                .attr('x1', 0)
                .attr('x2', width)
                .attr('y1', yScale(meanEffectiveness))
                .attr('y2', yScale(meanEffectiveness));

            // Add axes
            svg.append('g')
                .attr('transform', `translate(0,${height})`)
                .call(d3.axisBottom(xScale)
                    .tickFormat(d => formatMetric(d)));

            svg.append('g')
                .call(d3.axisLeft(yScale));

            // Add labels
            const effType = effectivenessType.options[effectivenessType.selectedIndex].text;
            svg.append('text')
                .attr('transform', 'rotate(-90)')
                .attr('y', 0 - margin.left)
                .attr('x', 0 - (height / 2))
                .attr('dy', '1em')
                .style('text-anchor', 'middle')
                .text('Effectiveness');

            svg.append('text')
                .attr('transform', `translate(${width / 2}, ${height + margin.bottom})`)
                .style('text-anchor', 'middle')
                .text(`Cumulative ${effType.toLowerCase()} before (software version)`);

            // Add title
            svg.append('text')
                .attr('x', width / 2)
                .attr('y', 0 - margin.top / 2)
                .attr('text-anchor', 'middle')
                .style('font-size', '16px')
                .style('font-weight', 'bold')
                .text(`Effectiveness by Basic Block (reduction in ${effType})`);

            // Add mean text box
            svg.append('rect')
                .attr('x', 5)
                .attr('y', 5)
                .attr('width', 80)
                .attr('height', 25)
                .style('fill', 'wheat')
                .style('opacity', 0.8)
                .style('stroke', 'gray')
                .style('stroke-width', 1)
                .style('rx', 3);

            svg.append('text')
                .attr('x', 45)
                .attr('y', 22)
                .attr('text-anchor', 'middle')
                .style('font-size', '12px')
                .text(`Mean: ${meanEffectiveness.toFixed(2)}`);

            // Add color legend
            if (validInstructions.length > 0) {
                const legendWidth = 20;
                const legendHeight = 200;

                const legendScale = d3.scaleLinear()
                    .domain([Math.log10(instrMin), Math.log10(instrMax)])
                    .range([legendHeight, 0]);

                const legendAxis = d3.axisRight(legendScale)
                    .ticks(5)
                    .tickFormat(d => Math.pow(10, d).toFixed(0));

                const legend = svg.append('g')
                    .attr('transform', `translate(${width + 40}, ${height / 2 - legendHeight / 2})`);

                // Create gradient
                const gradientId = 'instruction-gradient';
                const gradient = svg.append('defs')
                    .append('linearGradient')
                    .attr('id', gradientId)
                    .attr('x1', '0%')
                    .attr('y1', '100%')
                    .attr('x2', '0%')
                    .attr('y2', '0%');

                const steps = 20;
                for (let i = 0; i <= steps; i++) {
                    const t = i / steps;
                    const value = instrMin * Math.pow(instrMax / instrMin, t);
                    gradient.append('stop')
                        .attr('offset', `${t * 100}%`)
                        .style('stop-color', colorScale(value));
                }

                legend.append('rect')
                    .attr('width', legendWidth)
                    .attr('height', legendHeight)
                    .style('fill', `url(#${gradientId})`);

                legend.append('g')
                    .attr('transform', `translate(${legendWidth}, 0)`)
                    .call(legendAxis);

                legend.append('text')
                    .attr('transform', `rotate(90)`)
                    .attr('y', -legendWidth - 30)
                    .attr('x', legendHeight / 2)
                    .style('text-anchor', 'middle')
                    .style('font-size', '12px')
                    .text('Instructions (log)');
            }

            // Apply selection styling if a block is already selected
            updateBarSelection();
        }
    </script>
</body>

</html>

================================================
FILE: autoprecompiles/Cargo.toml
================================================
[package]
name = "powdr-autoprecompiles"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[dependencies]
powdr-expression.workspace = true
powdr-number.workspace = true
powdr-constraint-solver.workspace = true

itertools.workspace = true
log.workspace = true
num-traits.workspace = true
serde.workspace = true
tracing.workspace = true
tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
serde_json.workspace = true
rayon = "1.10.0"
strum = { version = "0.27.0", features = ["derive"] }
priority-queue = "2.7.0"

metrics.workspace = true
deepsize2 = "0.1.0"
derive_more.workspace = true
derivative.workspace = true

[dev-dependencies]
expect-test = "1.5.1"
flate2 = "1.1.2"
powdr-openvm-bus-interaction-handler.workspace = true
test-log.workspace = true
criterion = { version = "0.4", features = ["html_reports"] }

[package.metadata.cargo-udeps.ignore]
development = ["env_logger"]

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458

[[bench]]
name = "optimizer_benchmark"
harness = false


================================================
FILE: autoprecompiles/benches/optimizer_benchmark.rs
================================================
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use powdr_autoprecompiles::{
    bus_map::BusMap,
    export::{ApcWithBusMap, SimpleInstruction},
    optimizer::optimize,
    Apc, ColumnAllocator, DegreeBound,
};
use powdr_number::BabyBearField;

use powdr_openvm_bus_interaction_handler::{
    bus_map::OpenVmBusType, memory_bus_interaction::OpenVmMemoryBusInteraction,
    OpenVmBusInteractionHandler,
};

type TestApc = Apc<BabyBearField, SimpleInstruction<BabyBearField>, (), ()>;

const DEFAULT_DEGREE_BOUND: DegreeBound = DegreeBound {
    identities: 3,
    bus_interactions: 2,
};

/// Benching the `test_optimize` test
fn optimize_keccak_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("optimize-keccak");
    group.sample_size(10);

    let file = std::fs::File::open("tests/keccak_apc_pre_opt.json.gz").unwrap();
    let reader = flate2::read::GzDecoder::new(file);
    let apc: ApcWithBusMap<TestApc, BusMap<OpenVmBusType>> =
        serde_json::from_reader(reader).unwrap();

    group.bench_function("optimize", |b| {
        b.iter_batched(
            || {
                (
                    apc.apc.machine.clone(),
                    ColumnAllocator::from_max_poly_id_of_machine(&apc.apc.machine),
                )
            },
            |(machine, column_allocator)| {
                optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
                    black_box(machine),
                    OpenVmBusInteractionHandler::default(),
                    DEFAULT_DEGREE_BOUND,
                    &apc.bus_map,
                    column_allocator,
                    &mut Default::default(),
                )
                .unwrap()
            },
            criterion::BatchSize::SmallInput,
        );
    });
    group.finish();
}

criterion_group!(benches, optimize_keccak_benchmark);
criterion_main!(benches);


================================================
FILE: autoprecompiles/scripts/plot_effectiveness.py
================================================
#!/usr/bin/env python3

import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import argparse

def load_apc_data(json_path, effectiveness_type='cost'):
    """Load APC candidates and compute effectiveness."""
    with open(json_path, 'r') as f:
        data = json.load(f)["apcs"]
    
    def get_before_after_cost(item, eff_type):
        if eff_type == 'cost':
            return (item['cost_before'], item['cost_after'])
        elif eff_type == 'main_columns':
            return (item['stats']['before']['main_columns'], item['stats']['after']['main_columns'])
        elif eff_type == 'constraints':
            return (item['stats']['before']['constraints'], item['stats']['after']['constraints'])
        elif eff_type == 'bus_interactions':
            return (item['stats']['before']['bus_interactions'], item['stats']['after']['bus_interactions'])
        else:
            raise ValueError(f"Unknown effectiveness type: {eff_type}")
        
    rows = []
    for item in data:
        cost_before, cost_after = get_before_after_cost(item, effectiveness_type)
        rows.append({
            'start_pcs': [b['start_pc'] for b in item['original_blocks']],
            'cost_before': cost_before * item['execution_frequency'],
            'cost_after': cost_after * item['execution_frequency'],
            'effectiveness': cost_before / cost_after,
            'instructions': sum(len(b['instructions']) for b in item['original_blocks']),
        })

    return pd.DataFrame(rows)

def format_cell_count(count):
    """Format cell count with appropriate units."""
    if count >= 1e9:
        return f"{count/1e9:.1f}B"
    elif count >= 1e6:
        return f"{count/1e6:.1f}M"
    elif count >= 1e3:
        return f"{count/1e3:.1f}K"
    else:
        return f"{count:.0f}"

def plot_effectiveness(json_path, filename=None, effectiveness_type='cost'):
    """Generate bar plot of effectiveness data."""
    df = load_apc_data(json_path, effectiveness_type)
    total_cost_before = df['cost_before'].sum()
    total_cost_after = df['cost_after'].sum()

    # Print top 10 basic blocks
    top10 = df.nlargest(10, 'cost_before')[['start_pcs', 'cost_before', 'effectiveness', 'instructions']]
    print(top10)
    top10['cost_before'] = top10['cost_before'].apply(format_cell_count)
    top10.columns = ['Start PCs', 'Cost before', 'Effectiveness', 'Instructions']
    print(f"\nTop 10 Basic Blocks by {effectiveness_type}:")
    print(top10.to_string(index=False))
    print()
    
    # Calculate weighted mean effectiveness, corresponding to the overall effectiveness
    # assuming that all basic blocks are accelerated.
    mean_effectiveness = (df['effectiveness'] * df['cost_after']).sum() / total_cost_after
    print(f"Mean effectiveness: {mean_effectiveness:.2f}")
    
    # Separate large and small APCs (< 0.1% threshold)
    threshold = total_cost_before * 0.001
    df_large = df[df['cost_before'] >= threshold].copy()
    df_small = df[df['cost_before'] < threshold]
    
    # Sort large APCs by cost
    df_large = df_large.sort_values('cost_before', ascending=False)
    
    # Create 'Other' entry if there are small APCs
    if len(df_small) > 0:
        other_cost = df_small['cost_before'].sum()
        other_effectiveness = (df_small['effectiveness'] * df_small['cost_before']).sum() / other_cost
        other_row = pd.DataFrame([{
            'effectiveness': other_effectiveness,
            'cost_before': other_cost,
            'instructions': -1,  # Special marker for Other
            'is_other': True
        }])
        df_plot = pd.concat([df_large.assign(is_other=False), other_row], ignore_index=True)
    else:
        df_plot = df_large.assign(is_other=False)
    
    # Create plot
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Set up color mapping with log scale
    valid_instructions = df_plot[~df_plot['is_other']]['instructions']
    if len(valid_instructions) > 0:
        norm = mcolors.LogNorm(vmin=valid_instructions.min(), vmax=valid_instructions.max())
        cmap = plt.cm.RdYlGn  # Red-Yellow-Green colormap
    
    # Plot bars
    x_pos = 0
    for idx, row in df_plot.iterrows():
        width = row['cost_before']
        
        if row.get('is_other', False):
            color = 'lightgray'
        else:
            color = cmap(norm(row['instructions']))
        
        ax.bar(x_pos + width/2, row['effectiveness'], width=width,
               color=color, edgecolor='black', linewidth=0.5, alpha=0.8)
        
        # Label 'Other' box if it's wide enough
        if row.get('is_other', False) and width > total_cost_before * 0.02:  # Only label if > 2% of total width
            ax.text(x_pos + width/2, row['effectiveness']/2, 
                   f'Other\n({len(df_small)} APCs)',
                   ha='center', va='center', fontsize=10, 
                   color='black', weight='bold')
        
        x_pos += width
    
    # Formatting
    ax.set_xlabel('Cumulative cost before (software version)', fontsize=12)
    ax.set_ylabel('Effectiveness', fontsize=12)
    ax.set_title(f"Effectiveness by Basic Block (reduction in {effectiveness_type})", fontsize=14)
    ax.grid(True, alpha=0.3, axis='y')
    ax.axhline(mean_effectiveness, color='red', linestyle='--', linewidth=2, alpha=0.7)
    
    # Format x-axis
    ax.set_xlim(0, total_cost_before)
    x_ticks = ax.get_xticks()
    ax.set_xticklabels([format_cell_count(x) for x in x_ticks])
    
    # Add colorbar for instruction count
    if len(valid_instructions) > 0:
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, pad=0.02)
        cbar.set_label('Instructions (log scale)', rotation=270, labelpad=20)
    
    # Add mean text
    ax.text(0.02, 0.97, f'Mean: {mean_effectiveness:.2f}', 
            transform=ax.transAxes, fontsize=10, verticalalignment='top',
            bbox=dict(boxstyle='round,pad=0.5', facecolor='wheat', alpha=0.8))
    
    plt.tight_layout()
    
    # Save or show
    if filename:
        plt.savefig(filename, dpi=300, bbox_inches='tight')
    else:
        plt.show()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Plot effectiveness analysis from APC candidates JSON file.")
    parser.add_argument("json_path", help="Path to the APC candidates JSON file")
    parser.add_argument("-o", "--output", help="Optional file name to save the plot", default=None)
    parser.add_argument("-e", "--effectiveness", 
                       choices=['cost', 'main_columns', 'constraints', 'bus_interactions'],
                       default='cost',
                       help="Type of effectiveness calculation (default: cost_before/cost_after)")
    args = parser.parse_args()
    
    plot_effectiveness(args.json_path, args.output, args.effectiveness)


================================================
FILE: autoprecompiles/scripts/rank_apc_candidates.py
================================================
#!/usr/bin/env python3
"""
Simple APC Candidates JSON Parser

This script parses the apc_candidates.json file and extracts key information
in a concise format.
"""

import json
import sys
import argparse
from pathlib import Path
from tabulate import tabulate


def main():
    """Parse APC candidates and show key information."""
    parser = argparse.ArgumentParser(description="Parse APC candidates and show key information.")
    parser.add_argument("json_file", help="Path to the APC candidates JSON file")
    parser.add_argument("-o", "--output", help="Output file (default: stdout)", default=None)
    args = parser.parse_args()
    
    json_file = Path(args.json_file)
    output_file = args.output
    
    if not json_file.exists():
        print(f"Error: File {json_file} not found!")
        sys.exit(1)
    
    try:
        with open(json_file, 'r') as f:
            data = json.load(f)["apcs"]
    except Exception as e:
        print(f"Error reading file: {e}")
        sys.exit(1)
    
    # Capture output to write to file
    output_lines = []
    
    # Process and calculate densitys for each candidate
    candidates_with_densitys = []
    
    for i, candidate in enumerate(data):
        start_pcs = [b["start_pc"] for b in candidate["original_blocks"]]
        freq = candidate["execution_frequency"]
        num_instructions = sum(len(b["instructions"]) for b in candidate["original_blocks"])
        
        # Get optimization stats
        before_constraints = candidate["stats"]["before"]["constraints"]
        after_constraints = candidate["stats"]["after"]["constraints"]
        before_main_columns = candidate["stats"]["before"]["main_columns"]
        after_main_columns = candidate["stats"]["after"]["main_columns"]
        before_bus_interactions = candidate["stats"]["before"]["bus_interactions"]
        after_bus_interactions = candidate["stats"]["after"]["bus_interactions"]
        value = candidate["value"]
        cost_before = candidate["cost_before"]
        cost_after = candidate["cost_after"]
        
        # Calculate improvements as factors (before/after ratios)
        cost_improvement_factor = cost_before / cost_after
        constraint_improvement_factor = before_constraints / after_constraints
        main_columns_improvement_factor = before_main_columns / after_main_columns
        bus_interactions_improvement_factor = before_bus_interactions / after_bus_interactions

        # Calculate density used for ranking candidates
        density = value / cost_after
        
        candidates_with_densitys.append({
            'index': i + 1,
            'start_pcs': start_pcs,
            'freq': freq,
            'num_instructions': num_instructions,
            'before_constraints': before_constraints,
            'after_constraints': after_constraints,
            'before_main_columns': before_main_columns,
            'after_main_columns': after_main_columns,
            'before_bus_interactions': before_bus_interactions,
            'after_bus_interactions': after_bus_interactions,
            'cost_improvement_factor': cost_improvement_factor,
            'constraint_improvement_factor': constraint_improvement_factor,
            'main_columns_improvement_factor': main_columns_improvement_factor,
            'bus_interactions_improvement_factor': bus_interactions_improvement_factor,
            'value': value,
            'cost_before': cost_before,
            'cost_after': cost_after,
            'density': density,
        })
    
    # Sort by descending density
    candidates_with_densitys.sort(key=lambda x: x['density'], reverse=True)
    
    # Summary statistics (moved to top)
    output_lines.append("")
    output_lines.append("=" * 120)
    output_lines.append(f"SUMMARY STATISTICS OVER ALL APC CANDIDATES")
    output_lines.append("=" * 120)
    
    total_candidates = len(data)
    total_instructions = sum(len(b["instructions"]) for c in data for b in c["original_blocks"])
    
    total_cost_before = sum(c["cost_before"] for c in data)
    total_cost_after = sum(c["cost_after"] for c in data)
    total_cost_improvement_factor = total_cost_before / total_cost_after
    
    total_before_constraints = sum(c["stats"]["before"]["constraints"] for c in data)
    total_after_constraints = sum(c["stats"]["after"]["constraints"] for c in data)
    total_constraint_improvement_factor = total_before_constraints / total_after_constraints
    
    total_before_main_columns = sum(c["stats"]["before"]["main_columns"] for c in data)
    total_after_main_columns = sum(c["stats"]["after"]["main_columns"] for c in data)
    main_columns_improvement_factor = total_before_main_columns / total_after_main_columns
    
    total_before_bus_interactions = sum(c["stats"]["before"]["bus_interactions"] for c in data)
    total_after_bus_interactions = sum(c["stats"]["after"]["bus_interactions"] for c in data)
    total_bus_interactions_improvement_factor = total_before_bus_interactions / total_after_bus_interactions
    
    output_lines.append(f"# of APC Candidates: {total_candidates}")
    output_lines.append(f"Sum of Instructions: {total_instructions}")
    output_lines.append(f"Average Instructions per APC Candidate: {total_instructions / total_candidates:.1f}")
    output_lines.append("")
    output_lines.append(f"Sum of Cost: {total_cost_before} → {total_cost_after} ({total_cost_improvement_factor:.2f}x reduction)")
    output_lines.append(f"Sum of Main Columns: {total_before_main_columns} → {total_after_main_columns} ({main_columns_improvement_factor:.2f}x reduction)")
    output_lines.append(f"Sum of Constraints: {total_before_constraints} → {total_after_constraints} ({total_constraint_improvement_factor:.2f}x reduction)")
    output_lines.append(f"Sum of Bus Interactions: {total_before_bus_interactions} → {total_after_bus_interactions} ({total_bus_interactions_improvement_factor:.2f}x reduction)")
    
    # Statement count distribution
    stmt_dist = {}
    for c in data:
        stmt_count = sum(len(b["instructions"]) for b in c["original_blocks"])
        stmt_dist[stmt_count] = stmt_dist.get(stmt_count, 0) + 1
    
    output_lines.append("")
    output_lines.append("# of Instructions Distribution:")
    stmt_table_data = []
    for stmt_count in sorted(stmt_dist.keys()):
        count = stmt_dist[stmt_count]
        percentage = (count / total_candidates) * 100
        stmt_table_data.append([stmt_count, count, f"{percentage:.1f}%"])
    
    stmt_table_headers = ["Instructions", "# of Candidates", "Percentage"]
    stmt_table_output = tabulate(stmt_table_data, headers=stmt_table_headers, tablefmt="grid")
    output_lines.append(stmt_table_output)
    
    # Frequency distribution
    freq_dist = {}
    for c in data:
        freq = c["execution_frequency"]
        freq_dist[freq] = freq_dist.get(freq, 0) + 1
    
    output_lines.append("")
    output_lines.append("Execution Frequency Distribution:")
    freq_table_data = []
    for freq in sorted(freq_dist.keys()):
        count = freq_dist[freq]
        percentage = (count / total_candidates) * 100
        freq_table_data.append([f"{freq}x", count, f"{percentage:.1f}%"])
    
    freq_table_headers = ["Frequency", "# of Candidates", "Percentage"]
    freq_table_output = tabulate(freq_table_data, headers=freq_table_headers, tablefmt="grid")
    output_lines.append(freq_table_output)
    
    # Show sorted candidates by density using tabulate
    output_lines.append("")
    output_lines.append("=" * 120)
    output_lines.append("APC CANDIDATES RANKED BY DENSITY (VALUE / COST_AFTER)")
    output_lines.append("=" * 120)
    
    # Prepare table data for tabulate
    table_headers = [
        "Rank", "Start PCs", "# of Instr", "Freq", "Value", "Cost Before -> After (Redux)", 
        "Density", "Main Cols Before -> After (Redux)",
        "Constraints Before -> After (Redux)", "Bus Int Before -> After (Redux)"
    ]
    
    table_data = []
    for i, candidate in enumerate(candidates_with_densitys):
        row = [
            i + 1,
            str(candidate['start_pcs']),
            candidate['num_instructions'],
            f"{candidate['freq']}x",
            f"{candidate['value']:.0f}",
            f"{candidate['cost_before']:.0f} -> {candidate['cost_after']:.0f} ({candidate['cost_improvement_factor']:.1f}x)",
            f"{candidate['density']:.2f}",
            f"{candidate['before_main_columns']} -> {candidate['after_main_columns']} ({candidate['main_columns_improvement_factor']:.1f}x)",
            f"{candidate['before_constraints']} -> {candidate['after_constraints']} ({candidate['constraint_improvement_factor']:.1f}x)",
            f"{candidate['before_bus_interactions']} -> {candidate['after_bus_interactions']} ({candidate['bus_interactions_improvement_factor']:.1f}x)"
        ]
        table_data.append(row)
    
    # Generate table using tabulate
    table_output = tabulate(table_data, headers=table_headers, tablefmt="grid")
    output_lines.append(table_output)
    
    # Write output to file or stdout
    try:
        if output_file:
            with open(output_file, 'w') as f:
                for line in output_lines:
                    f.write(line + '\n')
            print(f"Output written to: {output_file}")
        else:
            # Write to stdout
            for line in output_lines:
                print(line)
    except Exception as e:
        print(f"Error writing to output file: {e}")
        # Fallback to console output
        for line in output_lines:
            print(line)


if __name__ == "__main__":
    main() 


================================================
FILE: autoprecompiles/scripts/readme.md
================================================
### Scripts

Set up (from the project root):

```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r autoprecompiles/scripts/requirements.txt
```

================================================
FILE: autoprecompiles/scripts/requirements.txt
================================================
pandas
matplotlib

================================================
FILE: autoprecompiles/src/adapter.rs
================================================
use powdr_constraint_solver::constraint_system::BusInteractionHandler;
use std::collections::BTreeMap;
use std::hash::Hash;
use std::{fmt::Display, sync::Arc};

use powdr_number::FieldElement;
use serde::{Deserialize, Serialize};

use crate::blocks::{detect_superblocks, ExecutionBlocks, SuperBlock};
use crate::empirical_constraints::EmpiricalConstraints;
use crate::evaluation::EvaluationResult;
use crate::execution::{ExecutionState, OptimisticConstraint, OptimisticConstraints};
use crate::execution_profile::ExecutionProfile;
use crate::{
    blocks::{BasicBlock, Instruction, Program},
    constraint_optimizer::IsBusStateful,
    memory_optimizer::MemoryBusInteraction,
    range_constraint_optimizer::RangeConstraintHandler,
    Apc, InstructionHandler, PowdrConfig, VmConfig,
};

#[derive(Serialize, Deserialize)]
pub struct ApcWithStats<F, I, A, V, S> {
    apc: Arc<Apc<F, I, A, V>>,
    stats: S,
    evaluation_result: EvaluationResult,
}
impl<F, I, A, V, S> ApcWithStats<F, I, A, V, S> {
    pub fn new(apc: Arc<Apc<F, I, A, V>>, stats: S, evaluation_result: EvaluationResult) -> Self {
        Self {
            apc,
            stats,
            evaluation_result,
        }
    }

    #[allow(clippy::type_complexity)]
    pub fn into_parts(self) -> (Arc<Apc<F, I, A, V>>, S, EvaluationResult) {
        (self.apc, self.stats, self.evaluation_result)
    }

    pub fn apc(&self) -> &Apc<F, I, A, V> {
        &self.apc
    }

    pub fn stats(&self) -> &S {
        &self.stats
    }

    pub fn evaluation_result(&self) -> EvaluationResult {
        self.evaluation_result
    }
}

pub trait PgoAdapter {
    type Adapter: Adapter;

    fn filter_blocks_and_create_apcs_with_pgo(
        &self,
        blocks: Vec<AdapterBasicBlock<Self::Adapter>>,
        config: &PowdrConfig,
        vm_config: AdapterVmConfig<Self::Adapter>,
        labels: BTreeMap<u64, Vec<String>>,
        empirical_constraints: EmpiricalConstraints,
    ) -> Vec<AdapterApcWithStats<Self::Adapter>> {
        let blocks = if let Some(prof) = self.execution_profile() {
            detect_superblocks(config, &prof.pc_list, blocks)
        } else {
            let superblocks = blocks
                .into_iter()
                .map(SuperBlock::from)
                // filter invalid APC candidates
                .filter(|sb| sb.instructions().count() > 1)
                .collect();
            ExecutionBlocks::new_without_pgo(superblocks)
        };

        self.create_apcs_with_pgo(blocks, config, vm_config, labels, empirical_constraints)
    }

    fn create_apcs_with_pgo(
        &self,
        exec_blocks: AdapterExecutionBlocks<Self::Adapter>,
        config: &PowdrConfig,
        vm_config: AdapterVmConfig<Self::Adapter>,
        labels: BTreeMap<u64, Vec<String>>,
        empirical_constraints: EmpiricalConstraints,
    ) -> Vec<AdapterApcWithStats<Self::Adapter>>;

    fn execution_profile(&self) -> Option<&ExecutionProfile> {
        None
    }

    fn pc_execution_count(&self, pc: u64) -> Option<u32> {
        self.execution_profile()
            .and_then(|prof| prof.pc_count.get(&pc).cloned())
    }
}

pub trait Adapter: Sized
where
    Self::InstructionHandler:
        InstructionHandler<Field = Self::Field, Instruction = Self::Instruction>,
{
    type Field: Serialize + for<'de> Deserialize<'de> + Send + Sync + Clone;
    type PowdrField: FieldElement;
    type InstructionHandler: InstructionHandler + Sync;
    type BusInteractionHandler: BusInteractionHandler<Self::PowdrField>
        + Clone
        + IsBusStateful<Self::PowdrField>
        + RangeConstraintHandler<Self::PowdrField>
        + Sync;
    type Program: Program<Self::Instruction> + Send;
    type Instruction: Instruction<Self::Field> + Serialize + for<'de> Deserialize<'de> + Send + Sync;
    type MemoryBusInteraction<V: Ord + Clone + Eq + Display + Hash>: MemoryBusInteraction<
        Self::PowdrField,
        V,
    >;
    type CustomBusTypes: Clone
        + Display
        + Sync
        + Eq
        + PartialEq
        + Serialize
        + for<'de> Deserialize<'de>;
    type ApcStats: Send + Sync;
    type AirId: Eq + Hash + Send + Sync;
    type ExecutionState: ExecutionState;

    fn into_field(e: Self::PowdrField) -> Self::Field;

    fn from_field(e: Self::Field) -> Self::PowdrField;

    /// Given the autoprecompile and the original instructions, return the stats
    fn apc_stats(
        apc: Arc<AdapterApc<Self>>,
        instruction_handler: &Self::InstructionHandler,
    ) -> Self::ApcStats;

    fn is_branching(instr: &Self::Instruction) -> bool;

    fn is_allowed(instr: &Self::Instruction) -> bool;
}

pub type AdapterApcWithStats<A> = ApcWithStats<
    <A as Adapter>::Field,
    <A as Adapter>::Instruction,
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
    <A as Adapter>::ApcStats,
>;
pub type ApcStats<A> = <A as Adapter>::ApcStats;
pub type AdapterApc<A> = Apc<
    <A as Adapter>::Field,
    <A as Adapter>::Instruction,
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
>;
pub type AdapterApcOverPowdrField<A> = Apc<
    <A as Adapter>::PowdrField,
    <A as Adapter>::Instruction,
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
>;
pub type AdapterVmConfig<'a, A> = VmConfig<
    'a,
    <A as Adapter>::InstructionHandler,
    <A as Adapter>::BusInteractionHandler,
    <A as Adapter>::CustomBusTypes,
>;
pub type AdapterExecutionState<A> = <A as Adapter>::ExecutionState;
pub type AdapterOptimisticConstraints<A> = OptimisticConstraints<
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
>;
pub type AdapterOptimisticConstraint<A> = OptimisticConstraint<
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
>;
pub type AdapterBasicBlock<A> = BasicBlock<<A as Adapter>::Instruction>;
pub type AdapterSuperBlock<A> = SuperBlock<<A as Adapter>::Instruction>;
pub type AdapterExecutionBlocks<A> = ExecutionBlocks<<A as Adapter>::Instruction>;


================================================
FILE: autoprecompiles/src/blocks/detection.rs
================================================
use std::collections::BTreeSet;

use crate::{
    adapter::Adapter,
    blocks::{BasicBlock, Program},
};

/// Collects basic blocks from a program
pub fn collect_basic_blocks<A: Adapter>(
    program: &A::Program,
    jumpdest_set: &BTreeSet<u64>,
) -> Vec<BasicBlock<A::Instruction>> {
    let mut blocks = Vec::new();
    let mut curr_block = BasicBlock {
        start_pc: program.instruction_index_to_pc(0),
        instructions: Vec::new(),
    };
    for (i, instr) in program.instructions().enumerate() {
        let is_target = jumpdest_set.contains(&program.instruction_index_to_pc(i));
        let is_branching = A::is_branching(&instr);
        let is_allowed = A::is_allowed(&instr);

        // If this opcode cannot be in an apc, we make sure it's alone in a BB.
        if !is_allowed {
            // If not empty, push the current block.
            if !curr_block.instructions.is_empty() {
                blocks.push(curr_block);
            }
            // Push the instruction itself
            blocks.push(BasicBlock {
                start_pc: program.instruction_index_to_pc(i),
                instructions: vec![instr.clone()],
            });
            // Skip the instruction and start a new block from the next instruction.
            curr_block = BasicBlock {
                start_pc: program.instruction_index_to_pc(i + 1),
                instructions: Vec::new(),
            };
        } else {
            // If the instruction is a target, we need to close the previous block
            // as is if not empty and start a new block from this instruction.
            if is_target {
                if !curr_block.instructions.is_empty() {
                    blocks.push(curr_block);
                }
                curr_block = BasicBlock {
                    start_pc: program.instruction_index_to_pc(i),
                    instructions: Vec::new(),
                };
            }
            curr_block.instructions.push(instr.clone());
            // If the instruction is a branch, we need to close this block
            // with this instruction and start a new block from the next one.
            if is_branching {
                blocks.push(curr_block); // guaranteed to be non-empty because an instruction was just pushed
                curr_block = BasicBlock {
                    start_pc: program.instruction_index_to_pc(i + 1),
                    instructions: Vec::new(),
                };
            }
        }
    }

    if !curr_block.instructions.is_empty() {
        blocks.push(curr_block);
    }

    tracing::info!(
        "Got {} basic blocks from `collect_basic_blocks`",
        blocks.len()
    );

    blocks
}


================================================
FILE: autoprecompiles/src/blocks/mod.rs
================================================
use std::{
    collections::{BTreeMap, HashMap},
    fmt::Display,
};

use itertools::Itertools;
use rayon::iter::{
    IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator,
};
use serde::{Deserialize, Serialize};

/// Tools to detect basic blocks in a program
mod detection;

pub use detection::collect_basic_blocks;

use crate::PowdrConfig;

#[derive(Debug, Serialize, Deserialize, Clone)]
/// A sequence of instructions starting at a given PC.
pub struct BasicBlock<I> {
    /// The program counter of the first instruction in this block.
    pub start_pc: u64,
    pub instructions: Vec<I>,
}

impl<I: PcStep> BasicBlock<I> {
    /// Returns an iterator over the program counters of the instructions in this block.
    pub fn pcs(&self) -> impl Iterator<Item = u64> + '_ {
        (0..self.instructions.len()).map(move |i| self.start_pc + (i as u64 * I::pc_step() as u64))
    }

    /// Returns an iterator over the program counters of the instructions in this block.
    pub fn instructions(&self) -> impl Iterator<Item = (u64, &I)> + '_ {
        self.instructions
            .iter()
            .enumerate()
            .map(|(index, i)| (self.start_pc + (index as u64 * I::pc_step() as u64), i))
    }
}

#[derive(Debug, Serialize, Deserialize, Clone)]
/// A sequence of basic blocks that can be made into an autoprecompile.
/// A single basic block is represented as a SuperBlock with one element.
pub struct SuperBlock<I> {
    blocks: Vec<BasicBlock<I>>,
}

impl<I> From<BasicBlock<I>> for SuperBlock<I> {
    fn from(basic_block: BasicBlock<I>) -> Self {
        SuperBlock {
            blocks: vec![basic_block],
        }
    }
}

impl<I> From<Vec<BasicBlock<I>>> for SuperBlock<I> {
    fn from(blocks: Vec<BasicBlock<I>>) -> Self {
        assert!(!blocks.is_empty());
        SuperBlock { blocks }
    }
}

impl<I> SuperBlock<I> {
    pub fn is_basic_block(&self) -> bool {
        self.blocks.len() == 1
    }

    pub fn try_as_basic_block(&self) -> Option<&BasicBlock<I>> {
        if self.is_basic_block() {
            Some(&self.blocks[0])
        } else {
            None
        }
    }
}

impl<I> SuperBlock<I> {
    /// Sequence of basic block start PCs, uniquely identifies this superblock
    pub fn start_pcs(&self) -> Vec<u64> {
        self.blocks.iter().map(|b| b.start_pc).collect()
    }

    /// For each basic block in the superblock, returns the index of its first instruction
    /// (within the superblock's flat instruction list) together with the block's start PC.
    pub fn instruction_indexed_start_pcs(&self) -> Vec<(usize, u64)> {
        let mut idx = 0;
        self.blocks
            .iter()
            .map(|b| {
                let elem = (idx, b.start_pc);
                idx += b.instructions.len();
                elem
            })
            .collect()
    }

    /// Sequence of basic blocks composing this superblock
    pub fn blocks(&self) -> impl Iterator<Item = &BasicBlock<I>> {
        self.blocks.iter()
    }

    /// Apply fn to every instruction in this superblock, returning a new superblock with the transformed instructions.
    pub fn map_instructions<F, I2>(self, f: F) -> SuperBlock<I2>
    where
        F: Fn(I) -> I2 + Clone,
    {
        SuperBlock {
            blocks: self
                .blocks
                .into_iter()
                .map(|b| BasicBlock {
                    start_pc: b.start_pc,
                    instructions: b.instructions.into_iter().map(f.clone()).collect(),
                })
                .collect(),
        }
    }
}

impl<I: PcStep> SuperBlock<I> {
    /// Returns an iterator over the program counters of the instructions in this block.
    pub fn pcs(&self) -> impl Iterator<Item = u64> + '_ {
        self.blocks.iter().flat_map(BasicBlock::pcs)
    }

    /// Sequence of instructions across all basic blocks in this superblock
    pub fn instructions(&self) -> impl Iterator<Item = (u64, &I)> {
        self.blocks.iter().flat_map(BasicBlock::instructions)
    }

    /// Parallel iterator over instructions across all basic blocks in this superblock
    pub fn par_instructions(&self) -> impl IndexedParallelIterator<Item = (u64, &I)>
    where
        I: Sync,
    {
        // note: we need collect_vec() because parallel flat_map does not implement IndexedParallelIterator
        self.instructions().collect_vec().into_par_iter()
    }
}

impl<I: Display> Display for SuperBlock<I> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(bb) = self.try_as_basic_block() {
            return bb.fmt(f);
        }
        writeln!(f, "SuperBlock(")?;
        let mut insn_idx = 0;
        for block in &self.blocks {
            writeln!(f, "   pc: {}, statements: [", block.start_pc)?;
            for instr in block.instructions.iter() {
                writeln!(f, "      instr {insn_idx:>3}:   {instr}")?;
                insn_idx += 1;
            }
            write!(f, "   ],")?;
        }
        write!(f, ")")
    }
}

impl<I: Display> Display for BasicBlock<I> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "BasicBlock(start_pc: {}, statements: [", self.start_pc)?;
        for (i, instr) in self.instructions.iter().enumerate() {
            writeln!(f, "   instr {i:>3}:   {instr}")?;
        }
        write!(f, "])")
    }
}

pub trait Program<I: PcStep> {
    /// Returns the base program counter.
    fn base_pc(&self) -> u64;

    /// Converts an instruction index to a program counter.
    fn instruction_index_to_pc(&self, idx: usize) -> u64 {
        self.base_pc() + (idx as u64 * I::pc_step() as u64)
    }

    /// Returns an iterator over the instructions in the program.
    fn instructions(&self) -> Box<dyn Iterator<Item = I> + '_>;

    /// Returns the number of instructions in the program.
    fn length(&self) -> u32;
}

pub trait PcStep {
    fn pc_step() -> u32;
}

pub trait Instruction<T>: Clone + Display + PcStep {
    /// Returns a list of concrete values that the LHS of the PC lookup should be assigned to.
    fn pc_lookup_row(&self, pc: u64) -> Vec<T>;
}

/// A sequence of basic blocks seen in the execution, identified by their start PCs.
/// A run is interrupted by an invalid APC block (i.e., single instruction).
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ExecutionBasicBlockRun(pub Vec<u64>);

/// A superblock present in the program, together with execution statistics (if PGO is enabled)
pub struct BlockAndStats<I> {
    pub block: SuperBlock<I>,
    /// amount of times this block appears in the execution
    pub count: u32,
}

/// The result of superblock generation: a set of blocks with optional statistics for PGO.
pub struct ExecutionBlocks<I> {
    /// Superblocks seen in the execution.
    pub blocks: Vec<BlockAndStats<I>>,
    /// Basic block runs in the execution (if PGO is enabled).
    /// Each run is paired with the number of times it was seen.
    pub execution_bb_runs: Vec<(ExecutionBasicBlockRun, u32)>,
}

impl<I> ExecutionBlocks<I> {
    pub fn new_without_pgo(blocks: Vec<SuperBlock<I>>) -> Self {
        Self {
            blocks: blocks
                .into_iter()
                .map(|block| BlockAndStats { block, count: 0 })
                .collect(),
            execution_bb_runs: vec![],
        }
    }
}

/// Find the starting indices of non-overlapping occurrences of `needle` in `haystack`.
/// (e.g. `aba` is found at indices [0, 4] in `abababa`).
pub fn find_non_overlapping<T: Eq>(haystack: &[T], needle: &[T]) -> Vec<usize> {
    let mut indices = vec![];
    let mut pos = 0;
    while pos + needle.len() <= haystack.len() {
        if haystack[pos..pos + needle.len()] == needle[..] {
            indices.push(pos);
            pos += needle.len();
        } else {
            pos += 1;
        }
    }
    indices
}

/// Find basic block runs in the execution.
/// A run is interrupted upon hitting an invalid APC basic block (i.e., a single-instruction block).
/// Returns a list of the runs, coupled with how many times each appears (a run may repeat in the execution).
fn detect_execution_bb_runs<I>(
    // start PC to basic blocks. Should include every basic block in the program, including those with len=1 (invalid APC)
    start_pc_to_bb: &HashMap<u64, BasicBlock<I>>,
    execution: &[u64],
) -> Vec<(ExecutionBasicBlockRun, u32)> {
    // Basic block runs in the execution.
    // The same run can appear multiple times in the execution, so we keep a count using a hashmap.
    // Each BB is identified by its starting PC.
    let mut execution_bb_runs = BTreeMap::new();
    let mut current_run = vec![];

    let mut pos = 0;
    while pos < execution.len() {
        let pc = execution[pos];
        let bb = start_pc_to_bb
            .get(&pc)
            .expect("PC in execution not part of any basic blocks");
        assert!(!bb.instructions.is_empty());
        if bb.instructions.len() == 1 {
            // if starting a single instruction BB (i.e., invalid for APC), end current run
            if !current_run.is_empty() {
                *execution_bb_runs
                    .entry(std::mem::take(&mut current_run))
                    .or_insert(0) += 1;
            }
        } else {
            // extend the run with this basic block
            current_run.push(pc);
        }
        // move to next bb
        pos += bb.instructions.len();
    }
    if !current_run.is_empty() {
        *execution_bb_runs
            .entry(std::mem::take(&mut current_run))
            .or_insert(0) += 1;
    }

    execution_bb_runs
        .into_iter()
        .map(|(run, count)| (ExecutionBasicBlockRun(run), count))
        .collect()
}

/// Find all superblocks up to max_len in the basic block run and count their occurrences.
/// Returns a map from superblock to its count.
fn count_superblocks_in_run(
    bb_run: &ExecutionBasicBlockRun,
    max_len: usize,
) -> BTreeMap<Vec<u64>, u32> {
    let mut superblocks_in_run = BTreeMap::new();
    // first, we identify the superblocks in this run
    for len in 1..=std::cmp::min(max_len, bb_run.0.len()) {
        superblocks_in_run.extend(bb_run.0.windows(len).map(|w| (w.to_vec(), 0)));
    }
    // then we count their occurrences
    for (sblock, count) in superblocks_in_run.iter_mut() {
        *count = find_non_overlapping(&bb_run.0, sblock).len() as u32;
    }
    superblocks_in_run
}

/// Find all superblocks up to max_len in the execution and count their occurrences.
/// Returns a map from superblock to its count.
fn count_superblocks_in_execution(
    execution_bb_runs: &[(ExecutionBasicBlockRun, u32)],
    max_len: usize,
) -> BTreeMap<Vec<u64>, u32> {
    let sblocks = execution_bb_runs
        .par_iter()
        .map(|(run, run_count)| {
            count_superblocks_in_run(run, max_len)
                .into_iter()
                .map(|(sblock, sblock_occurrences_in_run)| {
                    (sblock, sblock_occurrences_in_run * run_count)
                })
                .collect()
        })
        .reduce(BTreeMap::new, |mut sblocks_a, sblocks_b| {
            // merge counts of b into a
            for (sblock, count) in sblocks_b {
                *sblocks_a.entry(sblock).or_insert(0) += count;
            }
            sblocks_a
        });
    sblocks
}

/// Detect basic blocks and superblocks present in the given execution.
/// Returns the detected blocks, together with their execution information.
/// Does not return invalid APC blocks (i.e., single instruction) and blocks that are never executed.
pub fn detect_superblocks<I: Clone + PcStep>(
    cfg: &PowdrConfig,
    // program execution as a sequence of PCs
    execution_pc_list: &[u64],
    // all program basic blocks (including single instruction ones), in no particular order
    basic_blocks: Vec<BasicBlock<I>>,
) -> ExecutionBlocks<I> {
    tracing::info!(
        "Detecting superblocks with <= {} basic blocks, over the sequence of {} PCs",
        cfg.superblock_max_bb_count,
        execution_pc_list.len()
    );

    let start = std::time::Instant::now();

    // index basic blocks by start PC
    let start_pc_to_bb: HashMap<_, _> = basic_blocks
        .into_iter()
        .map(|bb| (bb.start_pc, bb))
        .collect();

    let execution_bb_runs = detect_execution_bb_runs(&start_pc_to_bb, execution_pc_list);

    let blocks_found =
        count_superblocks_in_execution(&execution_bb_runs, cfg.superblock_max_bb_count as usize);

    tracing::info!(
        "Found {} blocks in {} basic block runs. Took {:?}",
        blocks_found.len(),
        execution_bb_runs.len(),
        start.elapsed(),
    );

    // build the result
    let mut block_stats = vec![];
    let mut skipped_exec_count = 0;
    let mut skipped_max_insn = 0;
    blocks_found.into_iter().for_each(|(sblock_pcs, count)| {
        let block = SuperBlock::from(
            sblock_pcs
                .iter()
                .map(|start_pc| start_pc_to_bb[start_pc].clone())
                .collect_vec(),
        );

        // skip superblocks that were executed less than the cutoff
        if count < cfg.apc_exec_count_cutoff {
            skipped_exec_count += 1;
            return;
        }

        // skip superblocks with too many instructions
        if block.instructions().count() > cfg.apc_max_instructions as usize {
            skipped_max_insn += 1;
            return;
        }

        block_stats.push(BlockAndStats { block, count });
    });

    tracing::info!(
        "Skipped blocks: {} to execution cutoff, {} to instruction count",
        skipped_exec_count,
        skipped_max_insn,
    );

    tracing::info!(
        "Of the {} remaining blocks, {} are basic blocks and {} are superblocks",
        block_stats.len(),
        block_stats
            .iter()
            .filter(|b| b.block.is_basic_block())
            .count(),
        block_stats
            .iter()
            .filter(|b| !b.block.is_basic_block())
            .count(),
    );

    ExecutionBlocks {
        blocks: block_stats,
        execution_bb_runs,
    }
}

#[cfg(test)]
mod test {
    use std::collections::BTreeMap;

    use crate::{DegreeBound, PowdrConfig};

    use super::*;

    #[derive(Clone)]
    struct TestInstruction;

    impl PcStep for TestInstruction {
        fn pc_step() -> u32 {
            1
        }
    }

    #[test]
    fn test_find_non_overlapping() {
        assert_eq!(find_non_overlapping(&[1, 2, 1, 2, 1], &[1, 2, 1]), vec![0]);
        assert_eq!(find_non_overlapping(&[1, 2, 3], &[1, 2, 3]), vec![0]);
        assert_eq!(find_non_overlapping(&[1, 2, 3], &[4]), vec![] as Vec<usize>);
        assert_eq!(find_non_overlapping(&[1, 1, 1], &[1]), vec![0, 1, 2]);
    }

    #[test]
    fn test_superblocks_in_run() {
        let run = ExecutionBasicBlockRun(vec![4, 1, 2, 3, 5, 1, 2, 3, 4]);
        let max_len = 3;
        let counts = count_superblocks_in_run(&run, max_len);
        assert_eq!(
            counts.len(),
            5 + // size 1
            6 + // size 2
            6 // size 3
        );
        assert_eq!(counts[&vec![1]], 2);
        assert_eq!(counts[&vec![1, 2]], 2);
        assert_eq!(counts[&vec![4]], 2);
        assert_eq!(counts[&vec![5]], 1);
        assert_eq!(counts[&vec![4, 1, 2]], 1);
        assert_eq!(counts[&vec![1, 2, 3]], 2);
        assert_eq!(counts[&vec![2, 3, 4]], 1);
    }

    #[test]
    fn test_detect_superblocks_counts_and_execution_runs() {
        let bb = |start_pc: u64, len: usize| BasicBlock {
            start_pc,
            instructions: vec![TestInstruction; len],
        };

        let cfg = PowdrConfig::new(
            10,
            0,
            DegreeBound {
                identities: 2,
                bus_interactions: 2,
            },
        )
        .with_superblocks(2, None, None);

        let basic_blocks = vec![bb(100, 2), bb(200, 2), bb(300, 1), bb(400, 3), bb(500, 2)];

        let execution = vec![100, 101, 200, 201, 300, 400, 401, 402, 100, 101, 200, 201];

        let result = detect_superblocks(&cfg, &execution, basic_blocks);

        assert_eq!(
            result.execution_bb_runs,
            vec![
                (ExecutionBasicBlockRun(vec![100, 200]), 1),
                (ExecutionBasicBlockRun(vec![400, 100, 200]), 1),
            ]
        );

        let counts = result
            .blocks
            .into_iter()
            .map(|entry| (entry.block.start_pcs(), entry.count))
            .collect::<BTreeMap<_, _>>();

        assert_eq!(counts.get(&vec![100]), Some(&2));
        assert_eq!(counts.get(&vec![200]), Some(&2));
        assert_eq!(counts.get(&vec![400]), Some(&1));
        assert_eq!(counts.get(&vec![100, 200]), Some(&2));
        assert_eq!(counts.get(&vec![400, 100]), Some(&1));
        assert!(!counts.contains_key(&vec![300]));
        assert!(!counts.contains_key(&vec![500]));
    }
}


================================================
FILE: autoprecompiles/src/bus_map.rs
================================================
use serde::{Deserialize, Serialize};
use std::{collections::BTreeMap, fmt::Display};

#[derive(Copy, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum BusType<C> {
    /// In a no-CPU architecture, instruction AIRs receive the current state and send the next state.
    /// Typically the state would include the current time stamp and program counter, but powdr does
    /// not make any assumptions about the state.
    ExecutionBridge,
    /// Memory bus for reading and writing memory.
    Memory,
    /// A lookup to fetch the instruction arguments for a given PC.
    PcLookup,
    /// Other types, specific to the VM integration. Powdr largely ignores these.
    Other(C),
}

impl<C: Display> std::fmt::Display for BusType<C> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let name = match self {
            BusType::ExecutionBridge => "EXECUTION_BRIDGE",
            BusType::Memory => "MEMORY",
            BusType::PcLookup => "PC_LOOKUP",
            BusType::Other(other_type) => &other_type.to_string(),
        };
        write!(f, "{name}")
    }
}

#[derive(Clone, Deserialize, Serialize)]
pub struct BusMap<C> {
    bus_ids: BTreeMap<u64, BusType<C>>,
}

impl<C: PartialEq + Eq + Clone + Display> BusMap<C> {
    /// Construct a new `BusMap`, ensuring the same id is not used for different `BusType`s
    pub fn from_id_type_pairs(pairs: impl IntoIterator<Item = (u64, BusType<C>)>) -> Self {
        let mut bus_ids = BTreeMap::new();
        for (k, v) in pairs.into_iter() {
            bus_ids.entry(k).and_modify(|existing| {
                if existing != &v {
                    panic!("BusType `{v}` already exists under ID `{existing}`, cannot map to `{v}`");
                }
            }).or_insert(v);
        }

        BusMap { bus_ids }
    }

    /// Lookup the `BusType` for a given ID.
    pub fn bus_type(&self, bus_id: u64) -> BusType<C> {
        self.bus_ids.get(&bus_id).cloned().unwrap_or_else(|| {
            panic!("No bus type found for ID: {bus_id}");
        })
    }

    /// View the entire map.
    pub fn all_types_by_id(&self) -> &BTreeMap<u64, BusType<C>> {
        &self.bus_ids
    }

    /// Find the ID for a given `BusType` (if any).
    pub fn get_bus_id(&self, bus_type: &BusType<C>) -> Option<u64> {
        self.bus_ids
            .iter()
            .find_map(|(id, bus)| if bus == bus_type { Some(*id) } else { None })
    }
}


================================================
FILE: autoprecompiles/src/constraint_optimizer.rs
================================================
use std::{
    collections::{HashMap, HashSet},
    fmt::Display,
    hash::Hash,
    iter::once,
};

use itertools::Itertools;
use num_traits::Zero;
use powdr_constraint_solver::{
    constraint_system::{
        AlgebraicConstraint, BusInteractionHandler, ConstraintRef, ConstraintSystem,
    },
    grouped_expression::GroupedExpression,
    indexed_constraint_system::IndexedConstraintSystem,
    inliner::DegreeBound,
    reachability::reachable_variables,
    rule_based_optimizer::rule_based_optimization,
    solver::Solver,
};
use powdr_number::FieldElement;
use serde::Serialize;

use crate::{
    export::ExportOptions,
    low_degree_bus_interaction_optimizer::LowDegreeBusInteractionOptimizer,
    memory_optimizer::{optimize_memory, MemoryBusInteraction},
    range_constraint_optimizer::RangeConstraintHandler,
    stats_logger::StatsLogger,
};

#[derive(Debug)]
pub enum Error {
    ConstraintSolverError(powdr_constraint_solver::solver::Error),
}

impl From<powdr_constraint_solver::solver::Error> for Error {
    fn from(err: powdr_constraint_solver::solver::Error) -> Self {
        Error::ConstraintSolverError(err)
    }
}

/// Simplifies the constraints as much as possible.
/// This function is similar to powdr_pilopt::qse_opt::run_qse_optimization, except it:
/// - Runs on the entire constraint system, including bus interactions.
/// - Panics if the solver fails.
/// - Removes trivial constraints (e.g. `0 = 0` or bus interaction with multiplicity `0`)
///   from the constraint system.
#[allow(clippy::too_many_arguments)]
pub fn optimize_constraints<
    P: FieldElement,
    V: Ord + Clone + Eq + Hash + Display + Serialize,
    M: MemoryBusInteraction<P, V>,
>(
    constraint_system: IndexedConstraintSystem<P, V>,
    solver: &mut impl Solver<P, V>,
    bus_interaction_handler: impl BusInteractionHandler<P>
        + IsBusStateful<P>
        + RangeConstraintHandler<P>
        + Clone,
    stats_logger: &mut StatsLogger,
    memory_bus_id: Option<u64>,
    degree_bound: DegreeBound,
    new_var: &mut impl FnMut(&str) -> V,
    export_options: &mut ExportOptions,
) -> Result<ConstraintSystem<P, V>, Error> {
    let constraint_system = solver_based_optimization(constraint_system, solver, export_options)?;
    stats_logger.log("solver-based optimization", &constraint_system);
    export_options.export_optimizer_inner_constraint_system(constraint_system.system(), "solver");

    let constraint_system = remove_trivial_constraints(constraint_system);
    stats_logger.log("removing trivial constraints", &constraint_system);
    export_options
        .export_optimizer_inner_constraint_system(constraint_system.system(), "remove_trivial");

    let constraint_system =
        remove_free_variables(constraint_system, solver, bus_interaction_handler.clone());
    stats_logger.log("removing free variables", &constraint_system);
    export_options
        .export_optimizer_inner_constraint_system(constraint_system.system(), "remove_free");

    let constraint_system =
        remove_disconnected_columns(constraint_system, solver, bus_interaction_handler.clone());
    stats_logger.log("removing disconnected columns", &constraint_system);
    export_options.export_optimizer_inner_constraint_system(
        constraint_system.system(),
        "remove_disconnected",
    );

    let constraint_system = trivial_simplifications(
        constraint_system,
        bus_interaction_handler.clone(),
        stats_logger,
    );
    export_options
        .export_optimizer_inner_constraint_system(constraint_system.system(), "trivial_simp");

    let (constraint_system, assignments) = rule_based_optimization(
        constraint_system,
        &*solver,
        bus_interaction_handler.clone(),
        new_var,
        // No degree bound given, i.e. only perform replacements that
        // do not increase the degree.
        None,
    );
    solver.add_algebraic_constraints(assignments.iter().map(|(v, val)| {
        AlgebraicConstraint::assert_eq(
            GroupedExpression::from_unknown_variable(v.clone()),
            val.clone(),
        )
    }));
    stats_logger.log("rule-based optimization", &constraint_system);
    export_options.register_substituted_variables(assignments);
    export_options
        .export_optimizer_inner_constraint_system(constraint_system.system(), "rule_based");

    // At this point, we throw away the index and only keep the constraint system, since the rest of the optimisations are defined on the system alone
    let constraint_system: ConstraintSystem<P, V> = constraint_system.into();

    let constraint_system = substitute_bus_interaction_fields(solver, constraint_system);
    stats_logger.log(
        "substituting fields in bus interactions",
        &constraint_system,
    );
    export_options.export_optimizer_inner_constraint_system(
        &constraint_system,
        "substitute_bus_interactio_fields",
    );

    let constraint_system = optimize_memory::<_, _, M>(constraint_system, solver, memory_bus_id);

    stats_logger.log("memory optimization", &constraint_system);
    export_options.export_optimizer_inner_constraint_system(&constraint_system, "memory");

    let constraint_system = LowDegreeBusInteractionOptimizer::new(
        solver,
        bus_interaction_handler.clone(),
        degree_bound,
    )
    .optimize(constraint_system);
    stats_logger.log(
        "low degree bus interaction optimization",
        &constraint_system,
    );
    export_options.export_optimizer_inner_constraint_system(&constraint_system, "low_degree_bus");

    Ok(constraint_system)
}

/// Tries to replace each bus interaction field by a constant, if that expression
/// is known to be constant to the solver.
/// For each such field, also adds an algebraic constraint asserting that the field
/// expression is equal to the constant, because this is needed for soundness in some
/// situations.
/// For simple situations, this constraint will be optimizer away in subsequent stages.
fn substitute_bus_interaction_fields<P: FieldElement, V: Ord + Clone + Eq + Hash + Display>(
    solver: &mut impl Solver<P, V>,
    mut constraint_system: ConstraintSystem<P, V>,
) -> ConstraintSystem<P, V> {
    for field in constraint_system
        .bus_interactions
        .iter_mut()
        .flat_map(|bi| bi.fields_mut())
    {
        // If we have an expression of the form `a * x + b` that is known to be constant,
        // then we would already know the value of `x`.
        if field.is_affine() && field.linear_components().len() <= 1 {
            continue;
        }
        if let Some(v) = solver.try_to_equivalent_constant(field) {
            let constr =
                AlgebraicConstraint::assert_eq(field.clone(), GroupedExpression::from_number(v));
            *field = GroupedExpression::from_number(v);
            constraint_system.algebraic_constraints.push(constr);
        }
    }
    constraint_system
}

/// Performs some very easy simplifications that only remove constraints.
pub fn trivial_simplifications<P: FieldElement, V: Ord + Clone + Eq + Hash + Display>(
    constraint_system: IndexedConstraintSystem<P, V>,
    bus_interaction_handler: impl BusInteractionHandler<P>
        + IsBusStateful<P>
        + RangeConstraintHandler<P>
        + Clone,
    stats_logger: &mut StatsLogger,
) -> IndexedConstraintSystem<P, V> {
    let constraint_system = remove_trivial_constraints(constraint_system);
    stats_logger.log("removing trivial constraints", &constraint_system);

    let constraint_system =
        remove_equal_bus_interactions(constraint_system, bus_interaction_handler.clone());
    stats_logger.log("removing equal bus interactions", &constraint_system);

    let constraint_system = remove_redundant_constraints(constraint_system);
    stats_logger.log("removing redundant constraints", &constraint_system);

    let constraint_system = remove_unreferenced_derived_variables(constraint_system);
    stats_logger.log(
        "removing unreferenced derived variables",
        &constraint_system,
    );

    constraint_system
}

fn solver_based_optimization<T: FieldElement, V: Clone + Ord + Hash + Display + Serialize>(
    mut constraint_system: IndexedConstraintSystem<T, V>,
    solver: &mut impl Solver<T, V>,
    export_options: &mut ExportOptions,
) -> Result<IndexedConstraintSystem<T, V>, Error> {
    let assignments = solver.solve()?;
    log::trace!("Solver figured out the following assignments:");
    if log::log_enabled!(log::Level::Trace) {
        for (var, value) in assignments.iter() {
            log::trace!("  {var} = {value}");
        }
    }
    // Assert that all substitutions are affine so that the degree
    // does not increase.
    assert!(assignments.iter().all(|(_, expr)| expr.is_affine()));
    export_options.register_substituted_variables(
        assignments
            .iter()
            .map(|(v, expr)| (v.clone(), expr.clone())),
    );
    constraint_system.apply_substitutions(assignments);

    // Now try to replace bus interaction fields that the solver knows to be constant
    let mut bus_interactions = vec![];
    let mut new_algebraic_constraints = vec![];
    // We remove all bus interactions because we do not want to change the order.
    constraint_system.retain_bus_interactions(|bus_interaction| {
        let mut modified = false;
        let replacement = bus_interaction
            .fields()
            .map(|field| {
                if let Some(n) = try_replace_by_number(field, solver) {
                    modified = true;
                    new_algebraic_constraints
                        .push(AlgebraicConstraint::assert_eq(n.clone(), field.clone()));
                    n
                } else {
                    field.clone()
                }
            })
            .collect();
        if modified {
            log::trace!("Replacing bus interaction {bus_interaction} with {replacement}");
        }
        bus_interactions.push(replacement);
        false
    });
    constraint_system.add_bus_interactions(bus_interactions);
    constraint_system.add_algebraic_constraints(new_algebraic_constraints);
    Ok(constraint_system)
}

/// Tries to find a number that is equivalent to the expression and returns it
/// as a GroupedExpression.
/// Returns None if it was unsuccessful or if the expression already is a number.
fn try_replace_by_number<T: FieldElement, V: Clone + Ord + Hash + Display>(
    expr: &GroupedExpression<T, V>,
    solver: &impl Solver<T, V>,
) -> Option<GroupedExpression<T, V>> {
    if expr.try_to_number().is_some() {
        return None;
    }
    Some(GroupedExpression::from_number(
        solver
            .range_constraint_for_expression(expr)
            .try_to_single_value()?,
    ))
}

/// Removes free variables from the constraint system, under some conditions.
///
/// Motivation: Suppose there is a constraint `2 * foo = bar` and `foo` only appears in this constraint.
/// Then, if we assume that all constraints are satisfiable, the prover would be able to satisfy it for
/// any value of `bar` by solving for `foo`. Therefore, the constraint can be removed.
/// The same would be true for a *stateless* bus interaction, e.g. `[foo * bar] in [BYTES]`.
///
/// This function removes *some* constraints like this (see TODOs below).
fn remove_free_variables<T: FieldElement, V: Clone + Ord + Eq + Hash + Display>(
    mut constraint_system: IndexedConstraintSystem<T, V>,
    solver: &mut impl Solver<T, V>,
    bus_interaction_handler: impl IsBusStateful<T> + Clone,
) -> IndexedConstraintSystem<T, V> {
    let all_variables = constraint_system
        .system()
        .referenced_unknown_variables()
        .cloned()
        .collect::<HashSet<_>>();

    let variables_to_delete = all_variables
        .iter()
        // Find variables that are referenced in exactly one constraint
        .filter_map(|variable| {
            constraint_system
                .constraints_referencing_variables(once(variable))
                .exactly_one()
                .ok()
                .map(|constraint| (variable.clone(), constraint))
        })
        .filter(|(variable, constraint)| match constraint {
            // Remove the algebraic constraint if we can solve for the variable.
            ConstraintRef::AlgebraicConstraint(constr) => {
                can_always_be_satisfied_via_free_variable(*constr, variable)
            }
            ConstraintRef::BusInteraction(bus_interaction) => {
                let bus_id = bus_interaction.bus_id.try_to_number().unwrap();
                // Only stateless bus interactions can be removed.
                let is_stateless = !bus_interaction_handler.is_stateful(bus_id);
                // TODO: This is overly strict.
                // We assume that the bus interaction is satisfiable. Given that it is, there
                // will be at least one assignment of the payload fields that satisfies it.
                // If the prover has the freedom to choose each payload field, it can always find
                // a satisfying assignment.
                // This could be generalized to multiple unknown fields, but it would be more complicated,
                // because *each* field would need a *different* free variable.
                let has_one_unknown_field = bus_interaction
                    .payload
                    .iter()
                    .filter(|field| field.try_to_number().is_none())
                    .count()
                    == 1;
                // If the expression is linear in the free variable, the prover would be able to solve for it
                // to satisfy the constraint. Otherwise, this is not necessarily the case.
                // Note that if the above check is true, there will only be one field of degree > 0.
                let all_degrees_at_most_one = bus_interaction
                    .payload
                    .iter()
                    .all(|field| field.degree_of_variable(variable) <= 1);
                is_stateless && has_one_unknown_field && all_degrees_at_most_one
            }
        })
        .map(|(variable, _constraint)| variable.clone())
        .collect::<HashSet<_>>();

    let variables_to_keep = all_variables
        .difference(&variables_to_delete)
        .cloned()
        .collect::<HashSet<_>>();

    solver.retain_variables(&variables_to_keep);

    constraint_system.retain_algebraic_constraints(|constraint| {
        constraint
            .referenced_unknown_variables()
            .all(|var| variables_to_keep.contains(var))
    });

    constraint_system.retain_bus_interactions(|bus_interaction| {
        let bus_id = bus_interaction.bus_id.try_to_number().unwrap();
        bus_interaction_handler.is_stateful(bus_id)
            || bus_interaction
                .referenced_unknown_variables()
                .all(|var| variables_to_keep.contains(var))
    });

    constraint_system
}

/// Returns true if the given constraint can always be made to be satisfied by setting the
/// free variable, regardless of the values of other variables.
fn can_always_be_satisfied_via_free_variable<
    T: FieldElement,
    V: Clone + Hash + Eq + Ord + Display,
>(
    constraint: AlgebraicConstraint<&GroupedExpression<T, V>>,
    free_variable: &V,
) -> bool {
    if constraint.try_solve_for(free_variable).is_some() {
        true
    } else if let Some((left, right)) = constraint.expression.try_as_single_product() {
        // If either `left` or `right` can be set to 0, the constraint is satisfied.
        can_always_be_satisfied_via_free_variable(AlgebraicConstraint::from(left), free_variable)
            || can_always_be_satisfied_via_free_variable(
                AlgebraicConstraint::from(right),
                free_variable,
            )
    } else {
        false
    }
}

/// Removes any columns that are not connected to *stateful* bus interactions (e.g. memory),
/// because those are the only way to interact with the rest of the zkVM (e.g. other
/// instructions).
/// We assume that the input constraint system is satisfiable. Because the removed constraints
/// are not connected to rest of the system, the prover can always satisfy them, so removing
/// them is safe.
/// Note that if there were unsatisfiable constraints, they might also be removed, which would
/// change the statement being proven.
pub fn remove_disconnected_columns<T: FieldElement, V: Clone + Ord + Eq + Hash + Display>(
    mut constraint_system: IndexedConstraintSystem<T, V>,
    solver: &mut impl Solver<T, V>,
    bus_interaction_handler: impl IsBusStateful<T> + Clone,
) -> IndexedConstraintSystem<T, V> {
    let initial_variables = variables_in_stateful_bus_interactions(
        constraint_system.system(),
        bus_interaction_handler.clone(),
    )
    .cloned();
    let variables_to_keep = reachable_variables(initial_variables, &constraint_system);

    solver.retain_variables(&variables_to_keep);

    constraint_system.retain_algebraic_constraints(|constraint| {
        constraint
            .referenced_unknown_variables()
            .any(|var| variables_to_keep.contains(var))
    });

    constraint_system.retain_bus_interactions(|bus_interaction| {
        let bus_id = bus_interaction.bus_id.try_to_number().unwrap();
        let has_vars_to_keep = bus_interaction
            .referenced_unknown_variables()
            .any(|var| variables_to_keep.contains(var));
        // has_vars_to_keep would also be false for bus interactions containing only
        // constants, so we also check again whether it is stateful.
        bus_interaction_handler.is_stateful(bus_id) || has_vars_to_keep
    });

    constraint_system
}

/// Returns an iterator over all variables that are referenced in stateful bus interactions.
fn variables_in_stateful_bus_interactions<'a, P: FieldElement, V: Ord + Clone + Eq + Hash>(
    constraint_system: &'a ConstraintSystem<P, V>,
    bus_interaction_handler: impl IsBusStateful<P> + 'a,
) -> impl Iterator<Item = &'a V> {
    constraint_system
        .bus_interactions
        .iter()
        .filter(move |bus_interaction| {
            let bus_id = bus_interaction.bus_id.try_to_number().unwrap();
            bus_interaction_handler.is_stateful(bus_id)
        })
        .flat_map(|bus_interaction| bus_interaction.referenced_unknown_variables())
}

fn remove_trivial_constraints<P: FieldElement, V: PartialEq + Clone + Hash + Ord>(
    mut constraint_system: IndexedConstraintSystem<P, V>,
) -> IndexedConstraintSystem<P, V> {
    constraint_system.retain_algebraic_constraints(|constraint| !constraint.is_redundant());
    constraint_system
        .retain_bus_interactions(|bus_interaction| !bus_interaction.multiplicity.is_zero());
    constraint_system
}

fn remove_equal_bus_interactions<P: FieldElement, V: Ord + Clone + Eq + Hash>(
    mut constraint_system: IndexedConstraintSystem<P, V>,
    bus_interaction_handler: impl IsBusStateful<P>,
) -> IndexedConstraintSystem<P, V> {
    let mut seen = HashSet::new();
    constraint_system.retain_bus_interactions(|interaction| {
        // We only touch interactions with non-stateful buses.
        if let Some(bus_id) = interaction.bus_id.try_to_number() {
            if !bus_interaction_handler.is_stateful(bus_id) && !seen.insert(interaction.clone()) {
                return false;
            }
        }
        true
    });
    constraint_system
}

pub trait IsBusStateful<T: FieldElement> {
    /// Returns true if the bus with the given ID is stateful, i.e., whether there is any
    /// interaction with the rest of the zkVM. Examples of stateful buses are memory and
    /// execution bridge. Examples of non-stateful buses are fixed lookups.
    fn is_stateful(&self, bus_id: T) -> bool;
}

/// Removes constraints that are factors of other constraints.
fn remove_redundant_constraints<P: FieldElement, V: Clone + Ord + Hash + Display>(
    constraint_system: IndexedConstraintSystem<P, V>,
) -> IndexedConstraintSystem<P, V> {
    // First, remove duplicate factors from the constraints.
    let mut constraint_system = remove_duplicate_factors(constraint_system);

    // Maps each factor to the set of constraints that contain it.
    let mut constraints_by_factor = HashMap::new();
    // Turns each constraint into a set of factors.
    let constraints_as_factors = constraint_system
        .algebraic_constraints()
        .iter()
        .enumerate()
        .map(|(i, c)| {
            let factors = c.expression.to_factors();
            assert!(!factors.is_empty());
            for f in &factors {
                constraints_by_factor
                    .entry(f.clone())
                    .or_insert_with(HashSet::new)
                    .insert(i);
            }
            factors
        })
        .collect_vec();

    let mut redundant_constraints = HashSet::<usize>::new();
    for (i, factors) in constraints_as_factors.iter().enumerate() {
        // Go through all factors `f` and compute the intersection of all
        // constraints in `constraints_by_factor[f]`. These constraints
        // are multiples of the current constraint, so they are redundant
        // if they are proper multiples, i.e. have at least one more factor.
        let mut redundant = factors
            .iter()
            .map(|f| constraints_by_factor[f].clone())
            .reduce(|a, b| a.intersection(&b).copied().collect())
            .unwrap();
        // Only remove constraints that have the same factors if their index
        // is larger than the current one.
        // Counting the factors is sufficient here.
        redundant.retain(|j| {
            let other_factors = &constraints_as_factors[*j];
            // This assertion can fail if `remove_duplicate_factors` is not called at the start of this function.
            assert!(other_factors.len() >= factors.len());
            other_factors.len() > factors.len() || *j > i
        });
        redundant_constraints.extend(redundant);
    }
    let mut counter = 0;
    constraint_system.retain_algebraic_constraints(|_| {
        let retain = !redundant_constraints.contains(&counter);
        counter += 1;
        retain
    });
    constraint_system
}

/// If a constraint contains the same factor multiple times removes the duplicate factors.
fn remove_duplicate_factors<P: FieldElement, V: Clone + Ord + Hash + Display>(
    mut constraint_system: IndexedConstraintSystem<P, V>,
) -> IndexedConstraintSystem<P, V> {
    let mut constraint_to_add = vec![];
    constraint_system.retain_algebraic_constraints(|constraint| {
        let factors = constraint.expression.to_factors();
        assert!(!factors.is_empty());
        let factor_count = factors.len();
        let unique_factors = factors.into_iter().unique().collect_vec();
        if unique_factors.len() < factor_count {
            constraint_to_add.push(AlgebraicConstraint::assert_zero(
                unique_factors
                    .into_iter()
                    .reduce(|acc, factor| acc * factor)
                    .unwrap(),
            ));
            false
        } else {
            true
        }
    });
    constraint_system.add_algebraic_constraints(constraint_to_add);
    constraint_system
}

fn remove_unreferenced_derived_variables<P: FieldElement, V: Clone + Ord + Hash + Display>(
    mut constraint_system: IndexedConstraintSystem<P, V>,
) -> IndexedConstraintSystem<P, V> {
    // Note that `referenced_unknown_variables` only returns variables referenced in constraints.
    let referenced_variables = constraint_system
        .referenced_unknown_variables()
        .cloned()
        .collect::<HashSet<_>>();

    constraint_system.retain_derived_variables(|derived_var| {
        referenced_variables.contains(&derived_var.variable)
    });
    constraint_system
}


================================================
FILE: autoprecompiles/src/empirical_constraints.rs
================================================
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::hash::Hash;

use itertools::Itertools;
use serde::{Deserialize, Serialize};

pub use crate::equivalence_classes::{EquivalenceClass, Partition};

use crate::{
    adapter::Adapter,
    blocks::{PcStep, SuperBlock},
    expression::{AlgebraicExpression, AlgebraicReference},
    optimistic::{
        algebraic_references::BlockCellAlgebraicReferenceMapper,
        config::optimistic_precompile_config,
    },
    symbolic_machine::SymbolicConstraint,
};

/// "Constraints" that were inferred from execution statistics. They hold empirically
/// (most of the time), but are not guaranteed to hold in all cases.
#[derive(Serialize, Deserialize, Default, Debug)]
pub struct EmpiricalConstraints {
    /// For each program counter, the range constraints for each column.
    /// The range might not hold in 100% of cases.
    pub column_ranges_by_pc: BTreeMap<u32, Vec<(u32, u32)>>,
    /// For each basic block (identified by its starting PC), the equivalence classes of columns.
    pub equivalence_classes_by_block: BTreeMap<u64, Partition<BlockCell>>,
    pub debug_info: DebugInfo,
    /// Count of how many times each program counter was executed in the sampled executions.
    /// This can be used to set a threshold for applying constraints only to frequently executed PCs.
    pub pc_counts: BTreeMap<u32, u64>,
}

/// Empirical constraints for a specific basic block.
pub struct BlockEmpiricalConstraints {
    /// The pcs this block executes
    pcs: Vec<u64>,
    /// For each program counter in the block, the range constraints for each column, if any.
    /// The range might not hold in 100% of cases.
    pub column_ranges_by_pc: BTreeMap<u32, BTreeMap<usize, (u32, u32)>>,
    /// The equivalence classes of columns in the block.
    pub equivalence_classes: Partition<BlockCell>,
}

/// Debug information mapping AIR ids to program counters and column names.
#[derive(Serialize, Deserialize, Default, Debug, Clone)]
pub struct DebugInfo {
    /// Mapping from program counter to the ID of the AIR implementing this instruction.
    pub air_id_by_pc: BTreeMap<u32, usize>,
    /// Mapping from AIR ID to column names.
    pub column_names_by_air_id: BTreeMap<usize, Vec<String>>,
}

impl EmpiricalConstraints {
    /// Combines the empirical constraints with another set of empirical constraints.
    /// The resulting constraints are the most conservative combination of both.
    pub fn combine_with(&mut self, other: EmpiricalConstraints) {
        // Combine column ranges by PC
        for (pc, ranges) in other.column_ranges_by_pc {
            self.column_ranges_by_pc
                .entry(pc)
                .and_modify(|existing_ranges| {
                    for (i, (min, max)) in ranges.iter().enumerate() {
                        if let Some((existing_min, existing_max)) = existing_ranges.get_mut(i) {
                            *existing_min = (*existing_min).min(*min);
                            *existing_max = (*existing_max).max(*max);
                        }
                    }
                })
                .or_insert(ranges);
        }

        // Combine equivalence classes by block
        for (block_pc, classes) in other.equivalence_classes_by_block {
            // Compute the new equivalence classes for this block
            let new_equivalence_class = match self.equivalence_classes_by_block.entry(block_pc) {
                Entry::Vacant(_) => classes,
                Entry::Occupied(e) => e.remove().intersected_with(classes),
            };
            assert!(self
                .equivalence_classes_by_block
                .insert(block_pc, new_equivalence_class)
                .is_none());
        }

        self.debug_info.combine_with(other.debug_info);

        // Combine pc counts
        for (pc, count) in other.pc_counts {
            *self.pc_counts.entry(pc).or_insert(0) += count;
        }
    }

    /// Extracts the empirical constraints relevant for a specific basic block.
    pub fn for_block<I: PcStep>(&self, block: &SuperBlock<I>) -> BlockEmpiricalConstraints {
        let pcs = block.pcs().collect_vec();

        let column_ranges_by_pc = pcs
            .iter()
            .filter_map(|pc| {
                self.column_ranges_by_pc
                    .get(&(*pc as u32))
                    .cloned()
                    .map(|ranges| (*pc as u32, ranges.into_iter().enumerate().collect()))
            })
            .collect();

        let bb_independent_equivalence_classes = block
            .instruction_indexed_start_pcs()
            .into_iter()
            .map(|(insn_idx, bb_pc)| {
                self.equivalence_classes_by_block
                    .get(&bb_pc)
                    .cloned()
                    .unwrap_or_default()
                    // shift instructions indices according to index in super block
                    .map_elements(|mut elem| {
                        elem.instruction_idx += insn_idx;
                        elem
                    })
            });

        let equivalence_classes = bb_independent_equivalence_classes
            .into_iter()
            .reduce(|bb1, bb2| bb1.combine(bb2))
            .unwrap();

        BlockEmpiricalConstraints {
            pcs,
            column_ranges_by_pc,
            equivalence_classes,
        }
    }

    /// Returns a new `EmpiricalConstraints` instance containing only the constraints
    /// (both range and equivalence) that are based on a number of executions greater
    /// than or equal to a threshold passed in the `POWDR_OP_EXECUTION_COUNT_THRESHOLD`
    /// environment variable (or `DEFAULT_EXECUTION_COUNT_THRESHOLD`).
    /// This should mitigate overfitting to rare execution paths.
    pub fn apply_pc_threshold(self) -> Self {
        let threshold = optimistic_precompile_config().execution_count_threshold;
        EmpiricalConstraints {
            column_ranges_by_pc: self
                .column_ranges_by_pc
                .into_iter()
                .filter(|(pc, _)| self.pc_counts.get(pc).cloned().unwrap_or(0) >= threshold)
                .collect(),
            equivalence_classes_by_block: self
                .equivalence_classes_by_block
                .into_iter()
                .filter(|&(block_pc, _)| {
                    // For equivalence classes, it is enough to check the pc_counts of the first
                    // instruction in the block, as all other instruction will be executed at least
                    // as often.
                    self.pc_counts.get(&(block_pc as u32)).cloned().unwrap_or(0) >= threshold
                })
                .collect(),
            pc_counts: self.pc_counts.clone(),
            debug_info: self.debug_info.clone(),
        }
    }
}

impl BlockEmpiricalConstraints {
    /// Returns a new `BlockEmpiricalConstraints` instance containing only the
    /// constraints (both range and equivalence) for which the provided
    /// predicate on `BlockCell`s returns true.
    pub fn filtered(self, predicate: impl Fn(&BlockCell) -> bool) -> Self {
        let column_ranges_by_pc = self
            .column_ranges_by_pc
            .into_iter()
            .map(|(pc, ranges)| {
                // with superblocks, there might be multiple instructions with the same PC
                let pc_instruction_indices = self
                    .pcs
                    .iter()
                    .enumerate()
                    .filter(|(_, &opc)| opc == pc as u64)
                    .map(|(idx, _)| idx)
                    .collect_vec();
                let ranges = ranges
                    .into_iter()
                    .enumerate()
                    .filter_map(|(col_idx, range)| {
                        // check that the predicate holds in all matching instructions
                        pc_instruction_indices
                            .iter()
                            .all(|idx| predicate(&BlockCell::new(*idx, col_idx)))
                            .then_some(range)
                    })
                    .collect();
                (pc, ranges)
            })
            .collect();
        let equivalence_classes = self
            .equivalence_classes
            .to_classes()
            .into_iter()
            .map(|class| {
                // Remove cells from the equivalence class for which the predicate does not hold
                class
                    .into_iter()
                    .filter(|cell| predicate(cell))
                    .collect_vec()
            })
            .collect();
        Self {
            pcs: self.pcs,
            column_ranges_by_pc,
            equivalence_classes,
        }
    }
}

impl DebugInfo {
    pub fn combine_with(&mut self, other: DebugInfo) {
        merge_maps(&mut self.air_id_by_pc, other.air_id_by_pc);
        merge_maps(
            &mut self.column_names_by_air_id,
            other.column_names_by_air_id,
        );
    }

    pub fn take(&mut self) -> Self {
        Self {
            air_id_by_pc: std::mem::take(&mut self.air_id_by_pc),
            column_names_by_air_id: std::mem::take(&mut self.column_names_by_air_id),
        }
    }
}

/// Merges two maps, asserting that existing keys map to equal values.
fn merge_maps<K: Ord, V: Eq + Debug>(map1: &mut BTreeMap<K, V>, map2: BTreeMap<K, V>) {
    for (key, value) in map2 {
        match map1.entry(key) {
            Entry::Vacant(v) => {
                v.insert(value);
            }
            Entry::Occupied(existing) => {
                assert_eq!(*existing.get(), value,);
            }
        }
    }
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Debug, Copy, Clone)]
pub struct BlockCell {
    /// Instruction index within the basic block
    instruction_idx: usize,
    /// The column index within the instruction air
    column_idx: usize,
}

impl BlockCell {
    pub fn new(instruction_idx: usize, column_idx: usize) -> Self {
        Self {
            instruction_idx,
            column_idx,
        }
    }
}

/// Generates symbolic constraints based on empirical constraints for a given block.
pub struct ConstraintGenerator<'a, A: Adapter> {
    empirical_constraints: BlockEmpiricalConstraints,
    algebraic_references: BlockCellAlgebraicReferenceMapper,
    block: &'a SuperBlock<A::Instruction>,
}

impl<'a, A: Adapter> ConstraintGenerator<'a, A> {
    /// Creates a new `ConstraintGenerator`.
    ///
    /// Arguments:
    /// - `empirical_constraints`: The empirical constraints to use.
    /// - `algebraic_references`: The mapping from block cells to algebraic references.
    /// - `block`: The block for which to generate constraints.
    pub fn new(
        empirical_constraints: BlockEmpiricalConstraints,
        algebraic_references: BlockCellAlgebraicReferenceMapper,
        block: &'a SuperBlock<A::Instruction>,
    ) -> Self {
        Self {
            empirical_constraints,
            algebraic_references,
            block,
        }
    }

    fn get_algebraic_reference(&self, block_cell: &BlockCell) -> AlgebraicReference {
        self.algebraic_references
            .get_algebraic_reference(block_cell)
            .cloned()
            .unwrap_or_else(|| {
                panic!(
                    "Missing reference in block {:?}: {block_cell:?}",
                    self.block.start_pcs()
                )
            })
    }

    /// Generates all equality constraints
    pub fn generate_constraints(&self) -> Vec<EqualityConstraint<A::PowdrField>> {
        self.range_constraints()
            .into_iter()
            .chain(self.equivalence_constraints())
            .collect_vec()
    }

    /// Generates constraints of the form `var = <value>` for columns whose value is
    /// always the same empirically.
    // TODO: We could also enforce looser range constraints.
    // This is a bit more complicated though, because we'd have to add bus interactions
    // to actually enforce them.
    fn range_constraints(&self) -> Vec<EqualityConstraint<A::PowdrField>> {
        let mut constraints = Vec::new();

        for (idx, pc) in self.block.pcs().enumerate() {
            let pc = pc as u32;
            let Some(range_constraints) = self.empirical_constraints.column_ranges_by_pc.get(&pc)
            else {
                continue;
            };
            for (col_index, (min, max)) in range_constraints {
                let block_cell = BlockCell::new(idx, *col_index);
                if min == max {
                    let value = A::PowdrField::from(*min as u64);
                    let reference = self.get_algebraic_reference(&block_cell);

                    constraints.push(EqualityConstraint {
                        left: EqualityExpression::Reference(reference),
                        right: EqualityExpression::Number(value),
                    });
                }
            }
        }

        constraints
    }

    fn equivalence_constraints(&self) -> Vec<EqualityConstraint<A::PowdrField>> {
        let mut constraints = Vec::new();

        for equivalence_class in self.empirical_constraints.equivalence_classes.to_classes() {
            let first = equivalence_class.first().unwrap();
            let first_ref = self.get_algebraic_reference(first);
            for other in equivalence_class.iter().skip(1) {
                let other_ref = self.get_algebraic_reference(other);
                constraints.push(EqualityConstraint {
                    left: EqualityExpression::Reference(first_ref.clone()),
                    right: EqualityExpression::Reference(other_ref.clone()),
                });
            }
        }

        constraints
    }
}

/// An expression used in equality constraints.
/// This is a simplified version of `AlgebraicExpression` that only allows
/// references and numbers.
pub enum EqualityExpression<T> {
    Reference(AlgebraicReference),
    Number(T),
}

impl<T> From<EqualityExpression<T>> for AlgebraicExpression<T> {
    fn from(expr: EqualityExpression<T>) -> Self {
        match expr {
            EqualityExpression::Reference(r) => AlgebraicExpression::Reference(r),
            EqualityExpression::Number(n) => AlgebraicExpression::Number(n),
        }
    }
}

/// An equality constraint between two `EqualityExpression`s.
pub struct EqualityConstraint<T> {
    pub left: EqualityExpression<T>,
    pub right: EqualityExpression<T>,
}

impl<T> From<EqualityConstraint<T>> for SymbolicConstraint<T> {
    fn from(constraint: EqualityConstraint<T>) -> Self {
        SymbolicConstraint {
            expr: AlgebraicExpression::from(constraint.left)
                - AlgebraicExpression::from(constraint.right),
        }
    }
}


================================================
FILE: autoprecompiles/src/equivalence_classes.rs
================================================
use std::collections::{BTreeSet, HashMap};
use std::hash::Hash;

use derivative::Derivative;
use itertools::Itertools;
use rayon::prelude::*;
use serde::{Deserialize, Deserializer, Serialize, Serializer};

/// An equivalence class, i.e, a set of values of type `T` which are considered equivalent
pub type EquivalenceClass<T> = BTreeSet<T>;

/// A collection of equivalence classes where all classes are guaranteed to have at least two elements.
/// This is enforced by construction of this type only happening through collection, where we ignore empty and singleton classes.
///
/// Internally represented as a map from element to class ID for efficient intersection operations.
/// Serializes as Vec<Vec<T>> for JSON compatibility (JSON requires string keys in objects).
#[derive(Debug, Clone, Derivative)]
#[derivative(Default(bound = ""))]
pub struct Partition<T> {
    /// Maps each element to its class ID (0..num_classes)
    /// If an element is not present, it is in a singleton class.
    class_of: HashMap<T, usize>,
    /// Number of classes
    num_classes: usize,
}

impl<T: Eq + Hash + Serialize + Clone> Serialize for Partition<T> {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        // Serialize as Vec<Vec<T>> for JSON compatibility
        self.to_classes().serialize(serializer)
    }
}

impl<'de, T: Eq + Hash + Deserialize<'de>> Deserialize<'de> for Partition<T> {
    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
        // Deserialize from Vec<Vec<T>>, reusing FromIterator logic
        let classes: Vec<Vec<T>> = Vec::deserialize(deserializer)?;
        Ok(classes.into_iter().collect())
    }
}

impl<T: Eq + Hash, C: IntoIterator<Item = T>> FromIterator<C> for Partition<T>
where
    C::IntoIter: ExactSizeIterator,
{
    fn from_iter<I: IntoIterator<Item = C>>(iter: I) -> Self {
        let mut class_of = HashMap::new();
        let mut num_classes = 0;

        for class in iter {
            let class_iter = class.into_iter();
            // Ignore classes with 0 or 1 elements as they are useless
            if class_iter.len() > 1 {
                for element in class_iter {
                    assert!(class_of.insert(element, num_classes).is_none());
                }
                num_classes += 1;
            }
        }

        Self {
            class_of,
            num_classes,
        }
    }
}

impl<T: Eq + Hash + Clone> Partition<T> {
    /// Returns all equivalence classes as a Vec<Vec<T>>.
    /// Singleton classes are omitted.
    /// This is O(n) where n is the number of elements.
    #[allow(clippy::iter_over_hash_type)] // Order within classes doesn't matter semantically
    pub fn to_classes(&self) -> Vec<Vec<T>> {
        let mut classes: Vec<Vec<T>> = vec![Vec::new(); self.num_classes];
        for (elem, &class_id) in &self.class_of {
            classes[class_id].push(elem.clone());
        }
        classes
    }

    /// Intersects multiple partitions of the same universe into a single partition.
    /// In other words, two elements are in the same equivalence class in the resulting partition
    /// if and only if they are in the same equivalence class in all input partitions.
    /// Singleton equivalence classes are omitted from the result.
    pub fn intersect_many(partitions: impl IntoIterator<Item = Self>) -> Self {
        // Pairwise intersection: fold over partitions, intersecting two at a time.
        // This is more efficient than building Vec<usize> signatures because:
        // 1. We only hash (usize, usize) tuples instead of Vec<usize>
        // 2. The result shrinks after each intersection, making later steps faster
        partitions
            .into_iter()
            .reduce(Partition::intersected_with)
            .expect("expected at least one element")
    }

    /// Intersects two partitions.
    pub fn intersected_with(self, other: Self) -> Self {
        // Group elements by (class_in_self, class_in_other)
        // Elements with the same pair end up in the same result class
        self.class_of
            .into_iter()
            // Note that if an element is not in self or other, it is a
            // singleton and will also not be in the intersection.
            .filter_map(|(elem, class_a)| {
                other
                    .class_of
                    .get(&elem)
                    .map(|&class_b| ((class_a, class_b), elem.clone()))
            })
            .into_group_map()
            .into_values()
            .collect()
    }

    /// Combine two partitions of disjoint universes into a single partition.
    /// Elements from the two partitions must also not Eq collide.
    pub fn combine(mut self, other: Self) -> Self {
        let class_shift = self.num_classes;
        #[allow(clippy::iter_over_hash_type)]
        for (elem, class) in other.class_of {
            if self.class_of.insert(elem, class + class_shift).is_some() {
                panic!("Partition combine element collision");
            }
        }
        self.num_classes += other.num_classes;
        self
    }

    /// Modify elements while keeping their original class.
    /// The mapped elements must not Eq collide with each other.
    pub fn map_elements<T2: Eq + Hash + Clone, F: Fn(T) -> T2>(self, f: F) -> Partition<T2> {
        let mut new_class_of: HashMap<T2, usize> = Default::default();
        #[allow(clippy::iter_over_hash_type)]
        for (elem, class) in self.class_of {
            if new_class_of.insert(f(elem), class).is_some() {
                panic!("Partition element mapping collision");
            }
        }
        Partition::<T2> {
            class_of: new_class_of,
            num_classes: self.num_classes,
        }
    }
}

/// Number of partitions to combine in each chunk before parallelizing.
const CHUNK_SIZE: usize = 64;

impl<T: Eq + Hash + Copy + Send + Sync> Partition<T> {
    /// Intersects multiple partitions in parallel using a chunked tree reduction.
    ///
    /// Partitions are grouped into chunks, each chunk is intersected sequentially,
    /// then the chunk results are combined recursively in parallel.
    pub fn parallel_intersect(partitions: impl IndexedParallelIterator<Item = Self>) -> Self {
        if partitions.len() <= CHUNK_SIZE {
            // Base case: We only have one chunk, intersect sequentially
            let partitions = partitions.collect::<Vec<_>>();
            return Self::intersect_many(partitions);
        }

        // Chunk partitions and intersect each chunk in parallel
        let chunk_results = partitions
            .chunks(CHUNK_SIZE)
            .map(Self::intersect_many)
            // Not collecting here causes the type checker to hit the recursion limit...
            .collect::<Vec<_>>();

        // Recursively combine chunk results
        Self::parallel_intersect(chunk_results.into_par_iter())
    }
}

/// Equality implementation that converts to canonical form for comparison.
/// This is intentionally simple (not optimized) since it's only used in tests.
impl<T: Eq + Hash + Ord + Clone> PartialEq for Partition<T> {
    fn eq(&self, other: &Self) -> bool {
        self.to_canonical() == other.to_canonical()
    }
}

impl<T: Eq + Hash + Ord + Clone> Eq for Partition<T> {}

impl<T: Eq + Hash + Ord + Clone> Partition<T> {
    /// Converts to a canonical BTreeSet<BTreeSet<T>> form for equality comparison.
    fn to_canonical(&self) -> BTreeSet<BTreeSet<T>> {
        self.to_classes()
            .into_iter()
            .map(|class| class.into_iter().collect())
            .collect()
    }
}

#[cfg(test)]
mod tests {
    use crate::equivalence_classes::Partition;

    fn partition(sets: Vec<Vec<u32>>) -> Partition<u32> {
        sets.into_iter().collect()
    }

    #[test]
    fn test_intersect_partitions() {
        let partition1 = partition(vec![
            // Two classes: {1,2,3,4} and {5,6,7,8,9}
            vec![1, 2, 3, 4],
            vec![5, 6, 7, 8, 9],
        ]);
        let partition2 = partition(vec![
            // Classes: {2,3}, {4,5}, {6,7,8} (1 and 9 are singletons)
            vec![2, 3],
            vec![4, 5],
            vec![6, 7, 8],
        ]);
        let partition3 = partition(vec![
            // Classes: {2,3}, {6,7}, {8,9} (splits {6,7,8} into {6,7} and {8})
            vec![2, 3],
            vec![6, 7],
            vec![8, 9],
        ]);

        let result = Partition::intersect_many([partition1, partition2, partition3]);

        // After intersecting all three:
        // - {2,3} survives (in same class in all three)
        // - {6,7} survives (6,7,8 in p2 intersected with 6,7 in p3)
        // - 8 becomes singleton (was with 6,7 in p2, but with 9 in p3, and 9 not in p1's class)
        let expected = partition(vec![vec![2, 3], vec![6, 7]]);

        assert_eq!(result, expected);
    }

    #[test]
    fn test_default_partition_yields_no_classes() {
        // The default partition puts every element in its own singleton class,
        // which are omitted in the list of equivalence classes.
        let partition: Partition<u32> = Partition::default();
        assert_eq!(partition.to_classes().len(), 0);
    }

    #[test]
    fn test_map_elements() {
        let p = partition(vec![vec![1u32, 2], vec![3, 4]]);
        let mapped: Partition<String> = p.map_elements(|x| x.to_string());
        let expected: Partition<String> = vec![vec!["1", "2"], vec!["3", "4"]]
            .into_iter()
            .map(|v| v.into_iter().map(str::to_string))
            .collect();
        assert_eq!(mapped, expected);
    }

    #[test]
    #[should_panic(expected = "Partition element mapping collision")]
    fn test_map_elements_panics_on_collision() {
        let p = partition(vec![vec![1, 2]]);
        p.map_elements(|_| 0u32);
    }

    #[test]
    fn test_combine() {
        let p1 = partition(vec![vec![1, 2], vec![3, 4]]);
        let p2 = partition(vec![vec![5, 6], vec![7, 8]]);
        let combined = p1.combine(p2);
        let expected = partition(vec![vec![1, 2], vec![3, 4], vec![5, 6], vec![7, 8]]);
        assert_eq!(combined, expected);
    }

    #[test]
    #[should_panic(expected = "Partition combine element collision")]
    fn test_combine_panics_on_collision() {
        let p1 = partition(vec![vec![1, 2]]);
        let p2 = partition(vec![vec![1, 3]]);
        p1.combine(p2);
    }
}


================================================
FILE: autoprecompiles/src/evaluation.rs
================================================
use std::{fmt::Display, iter::Sum, ops::Add, sync::Arc};

use crate::{
    adapter::{Adapter, AdapterApc, AdapterApcWithStats},
    InstructionHandler, SymbolicMachine,
};

use serde::{Deserialize, Serialize};

#[derive(Clone, Copy, PartialEq, Default, Eq, Debug, Serialize, Deserialize)]
/// Statistics of an AIR
pub struct AirStats {
    /// The number of main columns
    pub main_columns: usize,
    /// The number of polynomial constraints
    pub constraints: usize,
    /// The number of bus interactions. Note that in some proof systems, they might
    /// translate to a number of columns. The exact number depends on many factors,
    /// including the degree of the bus interaction fields, which is not measured here.
    pub bus_interactions: usize,
}

impl AirStats {
    pub fn new<F>(machine: &SymbolicMachine<F>) -> Self {
        Self {
            main_columns: machine.main_columns().count(),
            constraints: machine.constraints.len(),
            bus_interactions: machine.bus_interactions.len(),
        }
    }
}

impl Add for AirStats {
    type Output = AirStats;
    fn add(self, rhs: AirStats) -> AirStats {
        AirStats {
            main_columns: self.main_columns + rhs.main_columns,
            constraints: self.constraints + rhs.constraints,
            bus_interactions: self.bus_interactions + rhs.bus_interactions,
        }
    }
}

impl Sum<AirStats> for AirStats {
    fn sum<I: Iterator<Item = AirStats>>(iter: I) -> AirStats {
        iter.fold(AirStats::default(), Add::add)
    }
}

#[derive(Clone, Copy, Serialize, Deserialize)]
/// Evaluation result of an APC evaluation
pub struct EvaluationResult {
    /// Statistics before optimizations, i.e., the sum of the AIR stats
    /// of all AIRs that *would* be involved in proving this block
    /// if it was run in software.
    pub before: AirStats,
    /// The AIR stats of the APC.
    pub after: AirStats,
}

/// Evaluate an APC by comparing its cost to the cost of executing the original instructions in software.
/// This is used by different pgo strategies in different stages. For example, for cell PGO, this is done before selection, and for instruction PGO, it is done after.
pub fn evaluate_apc<A: Adapter>(
    instruction_handler: &A::InstructionHandler,
    apc: AdapterApc<A>,
) -> AdapterApcWithStats<A> {
    let before = apc
        .block
        .instructions()
        .map(|(_, instruction)| instruction_handler.get_instruction_air_stats(instruction))
        .sum();
    let after = AirStats::new(apc.machine());
    let evaluation_result = EvaluationResult { before, after };

    let apc = Arc::new(apc);
    let apc_stats = A::apc_stats(apc.clone(), instruction_handler);

    AdapterApcWithStats::<A>::new(apc, apc_stats, evaluation_result)
}

impl Display for EvaluationResult {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let EvaluationResult { before, after } = self;
        write!(
            f,
            "APC advantage:\n  - Main columns: {}\n  - Bus interactions: {}\n  - Constraints: {}",
            render_stat(before.main_columns, after.main_columns),
            render_stat(before.bus_interactions, after.bus_interactions),
            render_stat(before.constraints, after.constraints)
        )
    }
}

fn render_stat(before: usize, after: usize) -> String {
    let effectiveness = before as f64 / after as f64;
    format!("{before} -> {after} ({effectiveness:.2}x reduction)")
}


================================================
FILE: autoprecompiles/src/execution/ast.rs
================================================
use std::iter;

use itertools::Itertools;
use powdr_expression::visitors::{AllChildren, Children};
use serde::{Deserialize, Serialize};

use crate::{execution::ExecutionState, powdr::UniqueReferences};

#[derive(Debug, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq, Clone)]
pub struct OptimisticConstraint<A, V> {
    pub left: OptimisticExpression<A, V>,
    pub right: OptimisticExpression<A, V>,
}

impl<A, V> Children<OptimisticExpression<A, V>> for OptimisticConstraint<A, V> {
    fn children(&self) -> Box<dyn Iterator<Item = &OptimisticExpression<A, V>> + '_> {
        Box::new([&self.left, &self.right].into_iter())
    }

    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut OptimisticExpression<A, V>> + '_> {
        Box::new([&mut self.left, &mut self.right].into_iter())
    }
}

impl<
        'a,
        A: 'a + Copy + PartialEq + Eq + std::hash::Hash,
        V: 'a,
        E: AllChildren<OptimisticExpression<A, V>>,
    > UniqueReferences<'a, (A, V), OptimisticLiteral<A>> for E
{
    fn unique_references(&'a self) -> impl Iterator<Item = OptimisticLiteral<A>> {
        self.all_children()
            .filter_map(|e| {
                if let OptimisticExpression::Literal(r) = e {
                    Some(*r)
                } else {
                    None
                }
            })
            .unique()
    }
}

impl<A, V> AllChildren<OptimisticExpression<A, V>> for OptimisticExpression<A, V> {
    fn all_children(&self) -> Box<dyn Iterator<Item = &OptimisticExpression<A, V>> + '_> {
        Box::new(iter::once(self).chain(self.children().flat_map(|e| e.all_children())))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq)]
pub enum OptimisticExpression<A, V> {
    Number(V),
    Literal(OptimisticLiteral<A>),
}

impl<A, V> OptimisticExpression<A, V> {
    fn children(&self) -> Box<dyn Iterator<Item = &OptimisticExpression<A, V>> + '_> {
        match self {
            OptimisticExpression::Literal(_) | OptimisticExpression::Number(_) => {
                Box::new(iter::empty())
            }
        }
    }
}

#[derive(
    Debug, Clone, Copy, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq, Hash,
)]
pub enum LocalOptimisticLiteral<A> {
    /// A register limb value. Limbs are indexed in little-endian order.
    RegisterLimb(A, usize),
    Pc,
}

impl<A> From<LocalOptimisticLiteral<A>> for LocalFetch<A> {
    fn from(value: LocalOptimisticLiteral<A>) -> Self {
        match value {
            LocalOptimisticLiteral::RegisterLimb(a, _) => Self::Register(a),
            LocalOptimisticLiteral::Pc => Self::Pc,
        }
    }
}

#[derive(
    Debug, Clone, Copy, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq, Hash,
)]
pub enum LocalFetch<A> {
    Register(A),
    Pc,
}

impl<A> LocalFetch<A> {
    pub fn get<E: ExecutionState<RegisterAddress = A>>(&self, state: &E) -> E::Value {
        match self {
            LocalFetch::Register(a) => state.reg(a),
            LocalFetch::Pc => state.pc(),
        }
    }
}

#[derive(
    Debug, Clone, Copy, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq, Hash,
)]
pub struct Fetch<A> {
    pub instr_idx: usize,
    pub val: LocalFetch<A>,
}

impl<A> From<OptimisticLiteral<A>> for Fetch<A> {
    fn from(value: OptimisticLiteral<A>) -> Self {
        Self {
            instr_idx: value.instr_idx,
            val: value.val.into(),
        }
    }
}

#[derive(
    Debug, Clone, Copy, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Eq, Hash,
)]
pub struct OptimisticLiteral<A> {
    pub instr_idx: usize,
    pub val: LocalOptimisticLiteral<A>,
}


================================================
FILE: autoprecompiles/src/execution/candidates.rs
================================================
use std::cmp::Ordering;

use itertools::Itertools;

use crate::execution::{
    evaluator::OptimisticConstraintFailed, ExecutionState, OptimisticConstraintEvaluator,
    OptimisticConstraints,
};

/// An APC candidate tracker
/// During execution, it keeps track of possible parts of the trace that can be assigned to APCs
pub struct ApcCandidates<E: ExecutionState, A, S> {
    apcs: Vec<A>,
    candidates: Vec<ApcCandidate<E, S>>,
}

/// A selected APC call
#[derive(
    Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize, deepsize2::DeepSizeOf,
)]
pub struct ApcCall<S> {
    /// The index of the APC that this call runs
    pub apc_id: usize,
    /// A snapshot before this APC
    pub from: S,
    /// A snapshot after this APC
    pub to: S,
}

impl<E: ExecutionState, A: Apc<E>, S> ApcCandidates<E, A, S> {
    pub fn new(apcs: Vec<A>) -> Self {
        Self {
            apcs,
            candidates: Default::default(),
        }
    }

    /// Given the current state of execution, retain the candidates whose constraints are still
    /// verified
    pub fn check_conditions(&mut self, state: &E, snapshot_callback: impl Fn() -> S) {
        // Filter out failing candidates and upgrade the ones that are done
        self.candidates
            .retain_mut(|candidate| match &mut candidate.status {
                // Check the conditions for unconfirmed candidates
                CandidateStatus::InProgress(optimistic_constraint_evaluator) => {
                    if optimistic_constraint_evaluator
                        .try_next_execution_step(
                            state,
                            self.apcs[candidate.apc_id].optimistic_constraints(),
                        )
                        .is_err()
                    {
                        return false;
                    }
                    // If we went through the whole block, confirm it
                    if candidate.total_check_count
                        == optimistic_constraint_evaluator.instruction_index()
                    {
                        candidate.status = CandidateStatus::Done(Clocked::new(
                            state.global_clk(),
                            snapshot_callback(),
                        ));
                    }
                    true
                }
                _ => true,
            });
    }

    /// Abort all candidates that are in progress.
    /// This is useful at the end of a segment, where some candidates being in progress block other candidates that are done from being extracted.
    /// Since we reached the end of the segment, we know that the candidates that are in progress will not be valid, so it's safe to drop them.
    pub fn abort_in_progress(&mut self) -> Vec<usize> {
        self.candidates
            .extract_if(.., |f| matches!(f.status, CandidateStatus::InProgress(_)))
            .map(|candidate| candidate.apc_id)
            .collect()
    }

    /// If no more candidates are in progress, return a set of non-overlapping calls
    pub fn extract_calls(&mut self) -> Vec<ApcCall<S>> {
        let are_any_in_progress = self
            .candidates
            .iter()
            .any(|c| matches!(c.status, CandidateStatus::InProgress(_)));

        // If any candidates are in progress, return nothing
        if are_any_in_progress {
            return vec![];
        }

        // Now we have no more candidates in progress

        // We need to solve conflicts to make sure we do not return overlapping candidates

        // Collect metadata needed to resolve overlaps in a single pass
        let meta = self.candidates.iter().enumerate().map(|(idx, candidate)| {
            let range = Self::candidate_range(candidate);
            (
                CandidateRank {
                    candidate_id: idx,
                    len: range.1 - range.0,
                    priority: self.apcs[candidate.apc_id].priority(),
                },
                range,
            )
        });

        // Find which candidates to discard by going through all pairs
        let discard = meta.tuple_combinations().fold(
            vec![false; self.candidates.len()],
            |mut discard, ((rank_left, range_left), (rank_right, range_right))| {
                let (rank_left, range_left) = (rank_left, range_left);
                let (rank_right, range_right) = (rank_right, range_right);
                let idx_left = rank_left.candidate_id;
                let idx_right = rank_right.candidate_id;

                // If one of the two is already discarded, or they do not overlap, do nothing
                if discard[idx_left]
                    || discard[idx_right]
                    || !Self::ranges_overlap(range_left, range_right)
                {
                    return discard;
                }

                // Otherwise, discard the one with lower priority
                match rank_left.cmp(&rank_right) {
                    Ordering::Greater => discard[idx_right] = true,
                    Ordering::Less => discard[idx_left] = true,
                    Ordering::Equal => unreachable!("by construction, two ranks cannot be equal"),
                }

                discard
            },
        );

        // Keep all candidates that were not marked as discarded
        self.candidates
            .drain(..)
            .zip_eq(discard)
            .filter_map(|(candidate, discard)| (!discard).then_some(candidate))
            .map(|candidate| {
                let CandidateStatus::Done(to) = candidate.status else {
                    unreachable!()
                };
                ApcCall {
                    apc_id: candidate.apc_id,
                    from: candidate.from_snapshot.snapshot,
                    to: to.snapshot,
                }
            })
            .collect()
    }

    /// Try to insert a new candidate.
    /// This can fail if the current state is incompatible with the optimistic constraints of the candidate
    pub fn try_insert(
        &mut self,
        state: &E,
        apc_id: usize,
        snapshot: impl Fn() -> S,
    ) -> Result<(), OptimisticConstraintFailed> {
        let apc_candidate = {
            let apc = &self.apcs[apc_id];
            let mut evaluator = OptimisticConstraintEvaluator::new();
            evaluator.try_next_execution_step(state, apc.optimistic_constraints())?;
            Ok(ApcCandidate {
                total_check_count: apc.cycle_count() + 1,
                apc_id,
                from_snapshot: Clocked::new(state.global_clk(), snapshot()),
                status: CandidateStatus::InProgress(evaluator),
            })
        }?;
        self.candidates.push(apc_candidate);
        Ok(())
    }

    fn candidate_range(candidate: &ApcCandidate<E, S>) -> (usize, usize) {
        let start = candidate.from_snapshot.global_clk;
        let end = match &candidate.status {
            CandidateStatus::Done(snapshot) => snapshot.global_clk,
            CandidateStatus::InProgress(_) => {
                unreachable!("candidate_range called on candidate still in progress")
            }
        };
        (start, end)
    }

    fn ranges_overlap((start_a, end_a): (usize, usize), (start_b, end_b): (usize, usize)) -> bool {
        start_a < end_b && start_b < end_a
    }
}

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct CandidateRank {
    /// Priority of this candidate. Higher is better.
    priority: usize,
    /// Length (number of cycles) covered by this candidate. Higher is better.
    len: usize,
    /// Index of the candidate within the current list. Lower is better.
    candidate_id: usize,
}

impl Ord for CandidateRank {
    fn cmp(&self, other: &Self) -> Ordering {
        self.priority
            .cmp(&other.priority)
            .then_with(|| self.len.cmp(&other.len))
            .then_with(|| other.candidate_id.cmp(&self.candidate_id))
    }
}

impl PartialOrd for CandidateRank {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

#[derive(Debug)]
struct ApcCandidate<E: ExecutionState, S> {
    /// The total number of steps to run
    /// This is the number of steps plus one, because we check the state before and after
    total_check_count: usize,
    /// The id of the apc candidate being run
    pub apc_id: usize,
    /// The state of the execution when this candidate was introduced
    pub from_snapshot: Clocked<S>,
    /// The status of this candidate
    pub status: CandidateStatus<E, S>,
}

#[derive(Debug)]
struct Clocked<S> {
    global_clk: usize,
    snapshot: S,
}

impl<S> Clocked<S> {
    fn new(global_clk: usize, snapshot: S) -> Self {
        Self {
            global_clk,
            snapshot,
        }
    }
}

/// A trait to represent APCs at execution time
pub trait Apc<E: ExecutionState> {
    /// Gets a reference to the optimistic constraints
    fn optimistic_constraints(&self) -> &OptimisticConstraints<E::RegisterAddress, E::Value>;

    /// The number of cycles to go through this APC
    fn cycle_count(&self) -> usize;

    /// Larger priority wins when APC execution ranges overlap.
    fn priority(&self) -> usize;
}

#[derive(Debug)]
enum CandidateStatus<E: ExecutionState, S> {
    /// We don't know yet if this apc candidate is valid. The conditions must be verified
    InProgress(OptimisticConstraintEvaluator<E::RegisterAddress, E::Value>),
    /// We know the candidate is valid until the given `Snapshot`
    Done(Clocked<S>),
}

#[cfg(test)]
mod tests {
    use crate::execution::{OptimisticConstraint, OptimisticExpression, OptimisticLiteral};

    use super::*;

    #[derive(Default, Clone, PartialEq, Debug)]
    struct TestApc {
        priority: usize,
        len: usize,
        optimistic_constraints: OptimisticConstraints<(), usize>,
    }

    impl TestApc {
        fn a(len: usize) -> Self {
            Self {
                len,
                ..Default::default()
            }
        }

        fn p(mut self, priority: usize) -> Self {
            self.priority = priority;
            self
        }

        fn c(mut self, optimistic_constraints: OptimisticConstraints<(), usize>) -> Self {
            self.optimistic_constraints = optimistic_constraints;
            self
        }
    }

    fn a(len: usize) -> TestApc {
        TestApc::a(len)
    }

    impl Apc<TestExecutionState> for TestApc {
        fn cycle_count(&self) -> usize {
            self.len
        }

        fn priority(&self) -> usize {
            self.priority
        }

        fn optimistic_constraints(&self) -> &OptimisticConstraints<(), usize> {
            &self.optimistic_constraints
        }
    }

    #[derive(Clone, Copy, PartialEq, Debug, Default)]
    struct TestExecutionState {
        pc: usize,
        instret: usize,
    }

    impl TestExecutionState {
        fn incr(&mut self) {
            self.jump(self.pc + 1)
        }

        fn jump(&mut self, pc: usize) {
            self.pc = pc;
            self.instret += 1;
        }

        fn snapshot(&self) -> TestSnapshot {
            TestSnapshot {
                instret: self.instret,
            }
        }
    }

    impl ExecutionState for TestExecutionState {
        type RegisterAddress = ();

        type Value = usize;

        fn value_limb(_: Self::Value, _: usize) -> Self::Value {
            todo!("Limbs are currently untested")
        }

        fn pc(&self) -> Self::Value {
            self.pc
        }

        fn reg(&self, _address: &Self::RegisterAddress) -> Self::Value {
            todo!("Constraints on register values is currently untested")
        }

        fn global_clk(&self) -> usize {
            self.instret
        }
    }

    #[derive(Clone, PartialEq, Debug, Copy)]
    struct TestSnapshot {
        instret: usize,
    }

    fn s(instret: usize) -> TestSnapshot {
        TestSnapshot { instret }
    }

    struct TestVm {
        state: TestExecutionState,
        candidates: ApcCandidates<TestExecutionState, TestApc, TestSnapshot>,
    }

    impl TestVm {
        fn try_add_candidate(&mut self, apc_id: usize) -> Result<(), OptimisticConstraintFailed> {
            self.candidates
                .try_insert(&self.state, apc_id, || self.state.snapshot())
        }

        // A helper function to go to the next execution state, check the conditions on it, and extract the calls
        fn incr(&mut self) -> Vec<ApcCall<TestSnapshot>> {
            self.state.incr();
            self.candidates
                .check_conditions(&self.state, || self.state.snapshot());
            self.candidates.extract_calls()
        }

        fn jump(&mut self, pc: usize) -> Vec<ApcCall<TestSnapshot>> {
            self.state.jump(pc);
            self.candidates
                .check_conditions(&self.state, || self.state.snapshot());
            self.candidates.extract_calls()
        }

        fn count_done(&self) -> usize {
            self.candidates
                .candidates
                .iter()
                .filter(|c| matches!(c.status, CandidateStatus::Done(_)))
                .count()
        }

        fn count_in_progress(&self) -> usize {
            self.candidates
                .candidates
                .iter()
                .filter(|c| matches!(c.status, CandidateStatus::InProgress(_)))
                .count()
        }

        fn new(apcs: impl IntoIterator<Item = TestApc>) -> Self {
            Self {
                state: Default::default(),
                candidates: ApcCandidates::new(apcs.into_iter().collect()),
            }
        }
    }

    #[test]
    fn single_candidate() {
        // an apc with 3 steps
        let mut vm = TestVm::new([a(3).p(1)]);
        let snapshot = s(0);
        let final_snapshot = s(3);
        // it will be checked in 4 steps, because we have conditions on the state before and after. The first check is included in `try_insert`.
        vm.try_add_candidate(0).unwrap();
        assert!(vm.incr().is_empty());
        assert!(vm.incr().is_empty());
        let output = vm.incr();
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id: 0,
                from: snapshot,
                to: final_snapshot,
            }
        );
    }

    #[test]
    fn single_candidate_final_state_failure() {
        // single apc with a constraint that fails on the final (step 2) state
        let failing_constraints =
            OptimisticConstraints::from_constraints(vec![OptimisticConstraint {
                left: OptimisticExpression::Literal(OptimisticLiteral {
                    instr_idx: 2,
                    val: crate::execution::LocalOptimisticLiteral::Pc,
                }),
                right: OptimisticExpression::Number(99),
            }]);
        let apc = a(2).p(1).c(failing_constraints);
        let mut vm = TestVm::new([apc]);
        vm.try_add_candidate(0).unwrap();
        assert!(vm.incr().is_empty());
        assert_eq!(vm.count_in_progress(), 1);
        let extracted = vm.incr();
        assert!(extracted.is_empty());
        assert_eq!(vm.count_in_progress(), 0);
        assert_eq!(vm.count_done(), 0);
    }

    #[test]
    fn two_candidates_same_length() {
        // insert two apcs with 3 steps each, but different priority
        let low_priority = a(3).p(1);
        let high_priority = a(3).p(2);
        let mut vm = TestVm::new([low_priority, high_priority]);
        let low_priority_id = 0;
        let high_priority_id = 1;
        let snapshot = s(0);
        let final_snapshot = s(3);
        vm.try_add_candidate(low_priority_id).unwrap();
        vm.try_add_candidate(high_priority_id).unwrap();
        assert!(vm.incr().is_empty());
        assert!(vm.incr().is_empty());
        let output = vm.incr();
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id: high_priority_id,
                from: snapshot,
                to: final_snapshot
            }
        );
    }

    #[test]
    fn superblock_success() {
        // insert two apcs with different length and priority
        // the superblock (longer block) apc has higher priority and succeeds so it should be picked
        let low_priority = a(3).p(1);
        let high_priority = a(4).p(2);
        let mut vm = TestVm::new([low_priority, high_priority]);
        let low_priority_id = 0;
        let high_priority_id = 1;
        let snapshot: TestSnapshot = s(0);
        // The final snapshot is the one at the end of the high priority apc, since it succeeds
        let final_snapshot = s(4);
        vm.try_add_candidate(low_priority_id).unwrap();
        vm.try_add_candidate(high_priority_id).unwrap();
        assert!(vm.incr().is_empty());
        assert!(vm.incr().is_empty());
        // Both are still running
        assert_eq!(vm.count_done(), 0);
        assert!(vm.incr().is_empty());
        // The first apc is done
        assert_eq!(vm.count_done(), 1);
        let output = vm.incr();
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id: high_priority_id,
                from: snapshot,
                to: final_snapshot,
            }
        );
    }

    #[test]
    fn superblock_failure() {
        // insert two apcs with different length and priority
        // the superblock (longer block) apc has higher priority but fails the branching condition, so the low priority apc should be picked
        let low_priority = a(3).p(1);
        let failing_constraints =
            OptimisticConstraints::from_constraints(vec![OptimisticConstraint {
                left: OptimisticExpression::Literal(OptimisticLiteral {
                    instr_idx: 3,
                    val: crate::execution::LocalOptimisticLiteral::Pc,
                }),
                right: OptimisticExpression::Number(1234),
            }]);
        let high_priority = a(4).p(2).c(failing_constraints);
        let mut vm = TestVm::new([low_priority, high_priority]);
        let low_priority_id = 0;
        let high_priority_id = 1;
        let snapshot: TestSnapshot = s(0);
        // The final snapshot is the one at the end of the low priority apc, as the other one failed
        let final_snapshot = s(3);
        vm.try_add_candidate(low_priority_id).unwrap();
        // The high priority candidate requires a jump to pc 1234 for the last cycle. This means the pc at step 3 (before instruction 4) should be 1234.
        vm.try_add_candidate(high_priority_id).unwrap();
        assert!(vm.incr().is_empty());
        assert!(vm.incr().is_empty());
        // Both apcs are still running
        assert_eq!(vm.count_done(), 0);
        // In this check, the low priority apc completes and the high priority one fails (as the jump did not happen)
        let output = vm.incr();
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id: low_priority_id,
                from: snapshot,
                to: final_snapshot,
            }
        );
    }

    #[test]
    fn superblock_failure_keeps_non_overlapping_calls() {
        // A and B are separate blocks; ABC spans A+B+C but fails within C.
        // When ABC fails, A and B should both be emitted since their ranges do not overlap.
        let a_len = 2;
        let b_len = 2;
        let c_len = 2;
        let abc_len = a_len + b_len + c_len;
        let fail_instr_idx = a_len + b_len + 1;
        let failing_constraints =
            OptimisticConstraints::from_constraints(vec![OptimisticConstraint {
                left: OptimisticExpression::Literal(OptimisticLiteral {
                    instr_idx: fail_instr_idx,
                    val: crate::execution::LocalOptimisticLiteral::Pc,
                }),
                right: OptimisticExpression::Number(999),
            }]);
        let apc_a = a(a_len).p(1);
        let apc_b = a(b_len).p(1);
        let apc_abc = a(abc_len).p(2).c(failing_constraints);
        let mut vm = TestVm::new([apc_a, apc_b, apc_abc]);

        let apc_a_id = 0;
        let apc_b_id = 1;
        let apc_abc_id = 2;

        vm.try_add_candidate(apc_a_id).unwrap();
        vm.try_add_candidate(apc_abc_id).unwrap();

        for _ in 0..a_len {
            assert!(vm.incr().is_empty());
        }

        vm.try_add_candidate(apc_b_id).unwrap();

        for _ in 0..b_len {
            assert!(vm.incr().is_empty());
        }

        let output = vm.incr();
        assert_eq!(
            output,
            vec![
                ApcCall {
                    apc_id: apc_a_id,
                    from: s(0),
                    to: s(2),
                },
                ApcCall {
                    apc_id: apc_b_id,
                    from: s(2),
                    to: s(4),
                },
            ]
        );
    }

    #[test]
    fn two_candidates_different_start() {
        // define two apcs with different priorities
        let low_priority = a(3).p(1);
        let high_priority = a(3).p(2);
        let mut vm = TestVm::new([low_priority, high_priority]);
        let low_priority_id = 0;
        let high_priority_id = 1;
        let high_priority_snapshot = s(1);
        let final_snapshot = s(4);
        // insert the low priority apc
        vm.try_add_candidate(low_priority_id).unwrap();
        assert!(vm.incr().is_empty());
        // candidate is still running
        assert_eq!(vm.count_in_progress(), 1);
        // insert the high priority apc
        vm.try_add_candidate(high_priority_id).unwrap();
        assert!(vm.incr().is_empty());
        // Both are still running
        assert_eq!(vm.count_in_progress(), 2);
        assert!(vm.incr().is_empty());
        // The first apc is done
        assert_eq!(vm.count_done(), 1);
        let output = vm.incr();
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id: high_priority_id,
                from: high_priority_snapshot,
                to: final_snapshot,
            }
        );
    }

    #[test]
    fn abort_in_progress_returns_shorter_candidate() {
        let short_low_priority = a(2).p(1);
        let long_high_priority = a(4).p(2);
        let mut vm = TestVm::new([short_low_priority, long_high_priority]);
        let short_low_priority_id = 0;
        let short_snapshot = s(0);
        let short_final_snapshot = s(2);

        vm.try_add_candidate(short_low_priority_id).unwrap();
        vm.try_add_candidate(1).unwrap();

        for _ in 0..2 {
            assert!(vm.incr().is_empty());
        }

        assert_eq!(vm.count_done(), 1);
        assert_eq!(vm.count_in_progress(), 1);

        vm.candidates.abort_in_progress();

        let extracted = vm.candidates.extract_calls();
        assert_eq!(extracted.len(), 1);
        assert_eq!(
            extracted[0],
            ApcCall {
                apc_id: short_low_priority_id,
                from: short_snapshot,
                to: short_final_snapshot,
            }
        );
    }

    #[test]
    fn abort_in_progress_after_segment_end_picks_shorter_candidate() {
        let short_low_priority = a(2).p(1);
        let long_high_priority = a(4).p(2);
        let mut vm = TestVm::new([short_low_priority, long_high_priority]);
        let short_low_priority_id = 0;
        let short_snapshot = s(0);
        let short_final_snapshot = s(2);

        vm.try_add_candidate(short_low_priority_id).unwrap();
        vm.try_add_candidate(1).unwrap();

        for _ in 0..2 {
            assert!(vm.incr().is_empty());
        }

        // The short one is done, the long one is still in progress
        assert_eq!(vm.count_done(), 1);
        assert_eq!(vm.count_in_progress(), 1);

        // Segment ends, abort the one in progress
        vm.candidates.abort_in_progress();

        // The extracted one should be the short one
        let extracted = vm.candidates.extract_calls();
        assert_eq!(extracted.len(), 1);
        assert_eq!(
            extracted[0],
            ApcCall {
                apc_id: short_low_priority_id,
                from: short_snapshot,
                to: short_final_snapshot,
            }
        );
    }

    #[test]
    fn jump_back_and_readd_candidate_does_not_overlap() {
        // We have a program like
        // 0: NOOP
        // 1: JUMP 0

        // We create an apc for the range, and check that calls do not overlap: the first call finishes before the second call starts

        let mut vm = TestVm::new([a(2).p(1)]);
        let apc_id = 0;

        // pc = 0, add the candidate
        vm.try_add_candidate(apc_id).unwrap();
        assert_eq!(vm.count_in_progress(), 1);

        assert!(vm.incr().is_empty());
        // pc = 1, candidate still in progress
        let output = vm.jump(0);
        // pc = 0, first candidate should be done
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id,
                from: s(0),
                to: s(2),
            }
        );

        // done with the first call, haven't started the second call, clean state.
        assert_eq!(vm.count_in_progress(), 0);
        assert_eq!(vm.count_done(), 0);

        // start over
        vm.try_add_candidate(apc_id).unwrap();
        assert_eq!(vm.count_in_progress(), 1);

        assert!(vm.incr().is_empty());
        let output = vm.jump(0);
        assert_eq!(output.len(), 1);
        assert_eq!(
            output[0],
            ApcCall {
                apc_id,
                from: s(2),
                to: s(4),
            }
        );
    }
}


================================================
FILE: autoprecompiles/src/execution/evaluator.rs
================================================
use std::collections::HashMap;

use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::{
    execution::{
        ast::{
            LocalOptimisticLiteral, OptimisticConstraint, OptimisticExpression, OptimisticLiteral,
        },
        ExecutionState, Fetch, LocalFetch,
    },
    powdr::UniqueReferences,
};

/// A collection of optimistic constraints over the intermediate execution states of a block, to be accessed in chronological order
#[derive(Debug, Serialize, Deserialize, deepsize2::DeepSizeOf, PartialEq, Clone, Default)]
pub struct OptimisticConstraints<A, V> {
    /// For each step, the execution values we need to remember for future constraints, excluding this step
    fetches_by_step: HashMap<usize, Vec<LocalFetch<A>>>,
    /// For each step, the constraints that must be satisfied
    constraints_to_check_by_step: HashMap<usize, Vec<OptimisticConstraint<A, V>>>,
}

impl<A, V> OptimisticConstraints<A, V> {
    pub fn empty() -> Self {
        Self {
            fetches_by_step: Default::default(),
            constraints_to_check_by_step: Default::default(),
        }
    }
}

impl<A: std::hash::Hash + PartialEq + Eq + Copy, V> OptimisticConstraints<A, V> {
    pub fn from_constraints(constraints: Vec<OptimisticConstraint<A, V>>) -> Self {
        // Extract each constraint together with the literals it references and the step
        // at which the constraint becomes evaluable (i.e. when all referenced literals
        // are available).
        let constraint_data = constraints
            .into_iter()
            .map(|constraint| {
                let references: Vec<_> = constraint.unique_references().collect();
                let first_evaluable_step = references
                    .iter()
                    .map(|r| r.instr_idx)
                    .max()
                    .unwrap_or_default();
                (first_evaluable_step, references, constraint)
            })
            .collect_vec();

        // For every literal that is referenced in a *future* step, schedule a fetch at
        // the step in which it first appears so it can be cached for later comparisons.
        let fetches_by_step = constraint_data
            .iter()
            .flat_map(|(constraint_step, references, _)| {
                references
                    .iter()
                    .filter(move |literal| *constraint_step > literal.instr_idx)
                    .map(|literal| (literal.instr_idx, literal.val.into()))
            })
            .into_group_map()
            .into_iter()
            .sorted_by_key(|(instruction_index, _)| *instruction_index)
            .collect();

        // The constraint itself can only be checked once all its literals exist.
        let constraints_to_check_by_step = constraint_data
            .into_iter()
            .map(|(first_evaluable_step, _, constraint)| (first_evaluable_step, constraint))
            .into_group_map()
            .into_iter()
            .sorted_by_key(|(instruction_index, _)| *instruction_index)
            .collect();

        Self {
            fetches_by_step,
            constraints_to_check_by_step,
        }
    }
}

/// An evaluator over a set of constraints
/// The expected use is to
/// - store the APC's set of optimistic constraints in the program
/// - when an APC is executed, create an instance of this evaluator over the APC's optimistic constraints
/// - as we go through the original instructions, call `OptimisticConstraintEvaluator::try_next_execution_step`
/// - if a constraint fails, stop checking the constraints
#[derive(Debug)]
pub struct OptimisticConstraintEvaluator<A, V> {
    /// The current instruction index in the execution
    instruction_index: usize,
    /// The values from previous intermediate states which we still need
    memory: HashMap<Fetch<A>, V>,
}

#[derive(Debug)]
pub struct OptimisticConstraintFailed;

impl<A, V> Default for OptimisticConstraintEvaluator<A, V> {
    fn default() -> Self {
        Self::new()
    }
}

impl<A, V> OptimisticConstraintEvaluator<A, V> {
    pub fn new() -> Self {
        Self {
            instruction_index: 0,
            memory: HashMap::default(),
        }
    }

    pub fn instruction_index(&self) -> usize {
        self.instruction_index
    }

    /// Check all constraints that can be checked at this stage, returning a new instance iff they are verified
    pub fn try_next_execution_step<E>(
        &mut self,
        state: &E,
        optimistic_constraints: &OptimisticConstraints<A, V>,
    ) -> Result<(), OptimisticConstraintFailed>
    where
        E: ExecutionState<RegisterAddress = A, Value = V>,
        A: std::hash::Hash + PartialEq + Eq + Copy,
        V: Copy,
    {
        // Get the constraints that can first be checked at this step
        let constraints = optimistic_constraints
            .constraints_to_check_by_step
            .get(&self.instruction_index);

        if let Some(constraints) = constraints {
            // Check the constraints based on the current state and the memory of the previous states
            let evaluator =
                StepOptimisticConstraintEvaluator::new(self.instruction_index, state, &self.memory);
            if !constraints
                .iter()
                .all(|constraint| evaluator.evaluate_constraint(constraint))
            {
                return Err(OptimisticConstraintFailed);
            }
        }

        // Get the values we need to store from the state to check constraints in the future
        let fetches = optimistic_constraints
            .fetches_by_step
            .get(&self.instruction_index);

        if let Some(fetches) = fetches {
            // fetch the values them in memory
            for fetch in fetches {
                let value = fetch.get(state);
                let key = Fetch {
                    instr_idx: self.instruction_index,
                    val: *fetch,
                };
                self.memory.insert(key, value);
            }
        }

        self.instruction_index += 1;

        Ok(())
    }
}

/// A constraint evaluator using the current execution state as well as the memory of previous states
struct StepOptimisticConstraintEvaluator<'a, E: ExecutionState> {
    step: usize,
    state: &'a E,
    memory: &'a HashMap<Fetch<E::RegisterAddress>, E::Value>,
}
impl<'a, E: ExecutionState> StepOptimisticConstraintEvaluator<'a, E> {
    fn new(
        step: usize,
        state: &'a E,
        memory: &'a HashMap<
            Fetch<<E as ExecutionState>::RegisterAddress>,
            <E as ExecutionState>::Value,
        >,
    ) -> Self {
        Self {
            step,
            memory,
            state,
        }
    }
}

impl<'a, E: ExecutionState> StepOptimisticConstraintEvaluator<'a, E> {
    fn evaluate_constraint(&self, c: &OptimisticConstraint<E::RegisterAddress, E::Value>) -> bool {
        self.evaluate_expression(&c.left) == self.evaluate_expression(&c.right)
    }

    fn evaluate_expression(
        &self,
        e: &OptimisticExpression<E::RegisterAddress, E::Value>,
    ) -> E::Value {
        match e {
            OptimisticExpression::Number(v) => *v,
            OptimisticExpression::Literal(optimistic_literal) => {
                self.evaluate_literal(optimistic_literal)
            }
        }
    }

    fn evaluate_literal(&self, l: &OptimisticLiteral<E::RegisterAddress>) -> E::Value {
        // By construction, the literals involved should only be from past states or the current state
        debug_assert!(l.instr_idx <= self.step);
        let fetch_value = self.fetch(&(*l).into());
        match l.val {
            LocalOptimisticLiteral::RegisterLimb(_, limb_index) => {
                E::value_limb(fetch_value, limb_index)
            }
            LocalOptimisticLiteral::Pc => fetch_value,
        }
    }

    fn fetch(&self, f: &Fetch<E::RegisterAddress>) -> E::Value {
        if f.instr_idx == self.step {
            // Hit the state for the current step
            f.val.get(self.state)
        } else {
            // Hit the memory for the previous steps
            self.memory[f]
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    struct TestExecutionState<const LIMB_WIDTH: usize> {
        mem: [u8; 2],
        pc: u8,
    }

    impl<const LIMB_WIDTH: usize> ExecutionState for TestExecutionState<LIMB_WIDTH> {
        type RegisterAddress = u8;

        type Value = u8;

        fn pc(&self) -> Self::Value {
            self.pc
        }

        fn reg(&self, address: &Self::RegisterAddress) -> Self::Value {
            self.mem[*address as usize]
        }

        fn value_limb(value: Self::Value, limb_index: usize) -> Self::Value {
            value >> (limb_index * LIMB_WIDTH) & (!0u8 >> (8 - LIMB_WIDTH))
        }

        fn global_clk(&self) -> usize {
            todo!()
        }
    }

    // An execution state with a single limb of 8 bits
    type SingleLimbExecutionState = TestExecutionState<8>;

    fn literal(instr_idx: usize, val: LocalOptimisticLiteral<u8>) -> OptimisticLiteral<u8> {
        OptimisticLiteral { instr_idx, val }
    }

    fn literal_expr(
        instr_idx: usize,
        val: LocalOptimisticLiteral<u8>,
    ) -> OptimisticExpression<u8, u8> {
        OptimisticExpression::Literal(literal(instr_idx, val))
    }

    // This is used in the cases where the value has a single limb, so we access the first limb
    fn mem(instr_idx: usize, addr: u8) -> OptimisticExpression<u8, u8> {
        mem_limb(instr_idx, addr, 0)
    }

    fn mem_limb(instr_idx: usize, addr: u8, limb_index: usize) -> OptimisticExpression<u8, u8> {
        literal_expr(
            instr_idx,
            LocalOptimisticLiteral::RegisterLimb(addr, limb_index),
        )
    }

    fn pc(instr_idx: usize) -> OptimisticExpression<u8, u8> {
        literal_expr(instr_idx, LocalOptimisticLiteral::Pc)
    }

    fn value(value: u8) -> OptimisticExpression<u8, u8> {
        OptimisticExpression::Number(value)
    }

    fn eq(
        left: OptimisticExpression<u8, u8>,
        right: OptimisticExpression<u8, u8>,
    ) -> OptimisticConstraint<u8, u8> {
        OptimisticConstraint { left, right }
    }

    fn equality_constraints() -> OptimisticConstraints<u8, u8> {
        OptimisticConstraints::from_constraints(vec![
            eq(mem(0, 0), mem(0, 1)),
            eq(mem(1, 0), mem(1, 1)),
            eq(mem(2, 0), mem(2, 1)),
        ])
    }

    fn cross_step_memory_constraint() -> OptimisticConstraints<u8, u8> {
        OptimisticConstraints::from_constraints(vec![eq(mem(0, 0), mem(1, 1))])
    }

    fn cross_step_pc_constraint() -> OptimisticConstraints<u8, u8> {
        OptimisticConstraints::from_constraints(vec![eq(pc(0), pc(1))])
    }

    fn initial_to_final_constraint(final_instr_idx: usize) -> OptimisticConstraints<u8, u8> {
        OptimisticConstraints::from_constraints(vec![eq(mem(0, 0), mem(final_instr_idx, 1))])
    }

    #[test]
    fn constraints_succeed_when_all_states_match() {
        let evaluator = OptimisticConstraintEvaluator::new();

        let states = [
            SingleLimbExecutionState { mem: [0, 0], pc: 0 },
            SingleLimbExecutionState { mem: [1, 1], pc: 1 },
            SingleLimbExecutionState { mem: [2, 2], pc: 2 },
        ];

        let res = states.iter().try_fold(evaluator, |mut evaluator, state| {
            evaluator
                .try_next_execution_step(state, &equality_constraints())
                .map(|_| evaluator)
        });

        assert!(res.is_ok());
    }

    #[test]
    fn checks_equality_constraints() {
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let states = [
            (SingleLimbExecutionState { mem: [0, 0], pc: 0 }, true),
            (SingleLimbExecutionState { mem: [1, 1], pc: 1 }, true),
            (SingleLimbExecutionState { mem: [2, 0], pc: 2 }, false),
        ];

        for (state, should_succeed) in &states {
            assert_eq!(
                evaluator
                    .try_next_execution_step(state, &equality_constraints())
                    .is_ok(),
                *should_succeed
            );
        }
    }

    #[test]
    fn reuses_values_from_previous_steps() {
        let constraints = cross_step_memory_constraint();
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let first_state = SingleLimbExecutionState { mem: [5, 0], pc: 0 };
        evaluator
            .try_next_execution_step(&first_state, &constraints)
            .unwrap();

        let second_state = SingleLimbExecutionState { mem: [0, 5], pc: 1 };

        assert!(evaluator
            .try_next_execution_step(&second_state, &constraints)
            .is_ok());
    }

    #[test]
    fn detects_mismatch_for_stored_values() {
        let constraints = cross_step_memory_constraint();
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let first_state = SingleLimbExecutionState { mem: [9, 0], pc: 0 };
        evaluator
            .try_next_execution_step(&first_state, &constraints)
            .unwrap();

        let second_state = SingleLimbExecutionState { mem: [0, 3], pc: 1 };

        assert!(evaluator
            .try_next_execution_step(&second_state, &constraints)
            .is_err());
    }

    #[test]
    fn compares_program_counter_across_steps() {
        let constraints = cross_step_pc_constraint();
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let first_state = SingleLimbExecutionState { mem: [0; 2], pc: 7 };
        evaluator
            .try_next_execution_step(&first_state, &constraints)
            .unwrap();

        let second_state = SingleLimbExecutionState { mem: [0; 2], pc: 7 };
        assert!(evaluator
            .try_next_execution_step(&second_state, &constraints)
            .is_ok());

        let mut failing_evaluator = OptimisticConstraintEvaluator::new();
        failing_evaluator
            .try_next_execution_step(&first_state, &constraints)
            .unwrap();

        let mismatched_pc = SingleLimbExecutionState { mem: [0; 2], pc: 8 };
        assert!(failing_evaluator
            .try_next_execution_step(&mismatched_pc, &constraints)
            .is_err());
    }

    #[test]
    fn links_initial_and_final_state() {
        let final_step = 2;
        let constraints = initial_to_final_constraint(final_step);
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let initial_state = SingleLimbExecutionState {
            mem: [11, 0],
            pc: 0,
        };
        evaluator
            .try_next_execution_step(&initial_state, &constraints)
            .unwrap();

        let middle_state = SingleLimbExecutionState { mem: [0; 2], pc: 1 };
        evaluator
            .try_next_execution_step(&middle_state, &constraints)
            .unwrap();

        let final_state = SingleLimbExecutionState {
            mem: [0, 11],
            pc: 2,
        };
        assert!(evaluator
            .try_next_execution_step(&final_state, &constraints)
            .is_ok());

        let mut failing_evaluator = OptimisticConstraintEvaluator::new();
        failing_evaluator
            .try_next_execution_step(&initial_state, &constraints)
            .unwrap();
        failing_evaluator
            .try_next_execution_step(&middle_state, &constraints)
            .unwrap();

        let mismatched_final_state = SingleLimbExecutionState { mem: [0, 3], pc: 2 };
        assert!(failing_evaluator
            .try_next_execution_step(&mismatched_final_state, &constraints)
            .is_err());
    }

    #[test]
    fn compares_memory_to_literal_value() {
        let constraints = OptimisticConstraints::from_constraints(vec![eq(mem(0, 0), value(99))]);
        let mut evaluator = OptimisticConstraintEvaluator::new();

        let passing_state = SingleLimbExecutionState {
            mem: [99, 0],
            pc: 0,
        };
        assert!(evaluator
            .try_next_execution_step(&passing_state, &constraints)
            .is_ok());

        let failing_constraints =
            OptimisticConstraints::from_constraints(vec![eq(mem(0, 0), value(10))]);
        let mut failing_evaluator = OptimisticConstraintEvaluator::new();
        let failing_state = SingleLimbExecutionState {
            mem: [12, 0],
            pc: 0,
        };
        assert!(failing_evaluator
            .try_next_execution_step(&failing_state, &failing_constraints)
            .is_err());
    }

    #[test]
    fn accesses_register_limbs() {
        let constraints = OptimisticConstraints::from_constraints(vec![
            eq(mem_limb(0, 0, 0), value(0b10)),
            eq(mem_limb(0, 0, 1), value(0b01)),
            eq(mem_limb(0, 0, 2), value(0b11)),
            eq(mem_limb(0, 0, 3), value(0b10)),
        ]);
        let mut evaluator = OptimisticConstraintEvaluator::new();

        // We use an execution state where each limb is two bits, so 4 limbs in total
        let state = TestExecutionState::<2> {
            mem: [0b1011_0110, 0],
            pc: 0,
        };

        assert!(evaluator
            .try_next_execution_step(&state, &constraints)
            .is_ok());
    }
}


================================================
FILE: autoprecompiles/src/execution/mod.rs
================================================
use serde::{Deserialize, Serialize};

mod ast;
mod candidates;
mod evaluator;

pub use ast::*;
pub use candidates::{Apc, ApcCall, ApcCandidates};
pub use evaluator::{OptimisticConstraintEvaluator, OptimisticConstraints};
pub trait ExecutionState {
    type RegisterAddress: PartialEq
        + Eq
        + std::hash::Hash
        + Clone
        + Copy
        + std::fmt::Debug
        + Serialize
        + for<'a> Deserialize<'a>
        + Send
        + Sync;
    type Value: PartialEq
        + TryFrom<u64>
        + Eq
        + std::fmt::Debug
        + Serialize
        + for<'a> Deserialize<'a>
        + Clone
        + Copy
        + Send
        + Sync;

    /// Return the pc at this point
    fn pc(&self) -> Self::Value;

    fn value_limb(value: Self::Value, limb_index: usize) -> Self::Value;

    /// Read a register at this point
    fn reg(&self, address: &Self::RegisterAddress) -> Self::Value;

    /// Return the value of a the clock. The returned value must be strictly increasing within this execution.
    fn global_clk(&self) -> usize;
}


================================================
FILE: autoprecompiles/src/execution_profile.rs
================================================
use crate::adapter::Adapter;
use crate::blocks::Program;
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::Mutex;
use tracing::dispatcher::Dispatch;
use tracing::field::Field as TracingField;
use tracing::{Event, Level, Subscriber};
use tracing_subscriber::{
    layer::Context,
    prelude::*,
    registry::{LookupSpan, Registry},
    Layer,
};

#[derive(Clone)]
/// Program execution information for PGO
pub struct ExecutionProfile {
    /// execution count of each pc
    pub pc_count: HashMap<u64, u32>,
    /// list of pcs executed in order
    pub pc_list: Vec<u64>,
}

/// Produces information about the program's execution for PGO.
/// Used in Pgo::Cell and Pgo::Instruction to help rank basic blocks to create APCs for.
pub fn execution_profile<A: Adapter>(
    program: &A::Program,
    execute_fn: impl FnOnce(),
) -> ExecutionProfile {
    // in memory collector storage
    let collector = PgoCollector::new();

    // build subscriber
    let subscriber = Registry::default().with(collector.clone());

    // dispatch constructs a local subscriber at trace level that is invoked during data collection but doesn't override the global one at info level
    let dispatch = Dispatch::new(subscriber);
    tracing::dispatcher::with_default(&dispatch, execute_fn);

    let pc_list = collector.take_pc_list();

    // Extract the collected data
    let pc_count = pc_list.iter().fold(HashMap::new(), |mut counts, pc| {
        *counts.entry(*pc).or_insert(0) += 1;
        counts
    });

    // the smallest pc is the same as the base_pc if there's no stdin
    let pc_min = pc_count.keys().min().unwrap();
    tracing::debug!("pc_min: {}; base_pc: {}", pc_min, program.base_pc());

    // print the total and by pc counts
    tracing::debug!("Pgo captured {} pc's", pc_count.len());

    if tracing::enabled!(Level::TRACE) {
        // print pc_index map in descending order of pc_index count
        let mut pc_index_count_sorted: Vec<_> = pc_count.iter().collect();
        pc_index_count_sorted.sort_by(|a, b| b.1.cmp(a.1));
        pc_index_count_sorted.iter().for_each(|(pc, count)| {
            tracing::trace!("pc_index {}: {}", pc, count);
        });
    }

    ExecutionProfile { pc_count, pc_list }
}

// holds basic type fields of execution objects captured in trace by subscriber
#[derive(Default)]
struct PgoData {
    pc: Option<u64>,
}

impl tracing::field::Visit for PgoData {
    // when we receive a u64 field, they are parsed into fields of the pgo data
    fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
        if field.name() == "pc" {
            self.pc = Some(value);
        }
    }

    // required for implementation, but in practice we will only receive u64 fields
    // the fields we receive are determined by the instruction trace print out of our openvm fork during execution
    fn record_debug(&mut self, _: &TracingField, _: &dyn std::fmt::Debug) {}
}

// A Layer that collects data we are interested in using for the pgo from the trace fields.
#[derive(Clone)]
struct PgoCollector {
    pc_list: Arc<Mutex<Vec<u64>>>,
}

impl PgoCollector {
    fn new() -> Self {
        Self {
            pc_list: Arc::new(Mutex::new(Vec::new())),
        }
    }

    fn increment(&self, pc: u64) {
        self.pc_list.lock().unwrap().push(pc);
    }

    fn take_pc_list(&self) -> Vec<u64> {
        std::mem::take(&mut self.pc_list.lock().unwrap())
    }
}

impl<S> Layer<S> for PgoCollector
where
    S: Subscriber + for<'a> LookupSpan<'a>,
{
    fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) {
        // build a visitor to parse and hold trace fields we are interested in
        let mut visitor = PgoData::default();
        event.record(&mut visitor);

        // because our subscriber is at the trace level, for trace print outs that don't match PgoData,
        // the visitor can't parse them, and these cases are filtered out automatically
        if let Some(pc) = visitor.pc {
            self.increment(pc);
        }
    }
}


================================================
FILE: autoprecompiles/src/export.rs
================================================
use std::{
    fmt::Display,
    io::{BufWriter, Write},
    path::PathBuf,
};

use itertools::Itertools;
use powdr_constraint_solver::constraint_system::ConstraintSystem;
use powdr_number::FieldElement;
use serde::{Deserialize, Serialize};

use crate::{
    adapter::{Adapter, AdapterApcOverPowdrField, AdapterOptimisticConstraints},
    blocks::{Instruction, PcStep, SuperBlock},
    bus_map::BusMap,
    execution::ExecutionState,
    expression::AlgebraicReference,
    symbolic_machine::constraint_system_to_symbolic_machine,
    Apc, ColumnAllocator, SymbolicMachine,
};

/// Configuration for exporting the state of the autoprecompile
/// generation and optimization as json at different stages.
#[derive(Default)]
pub struct ExportOptions {
    pub path: Option<PathBuf>,
    pub level: ExportLevel,
    sequence_number: usize,
    substituted_variables: Vec<String>,
}

#[derive(Default)]
pub enum ExportLevel {
    /// Export the unoptimized and optimized autoprecompile.
    #[default]
    OnlyAPC,
    /// In addition to the above, also export the state at each
    /// optimization loop iteration.
    APCAndOptimizerLoop,
    /// In addition to the above, also export the state at each
    /// optimization step.
    APCAndOptimizerSteps,
}

impl ExportOptions {
    /// Creates a new export options instance. Does not export anything unless
    /// a path is given. `path` is a path to a file name prefix.
    /// During export, a sequence number and an extension will be appended.
    pub fn new(path: Option<PathBuf>, start_pcs: &[u64], level: ExportLevel) -> Self {
        ExportOptions {
            path: path.map(|p| p.join(format!("apc_candidate_{}", start_pcs.iter().join("_")))),
            level,
            sequence_number: 0,
            substituted_variables: Vec::new(),
        }
    }
    /// Constructs export options from environment variables.
    pub fn from_env_vars(
        export_path: Option<String>,
        export_level: Option<String>,
        start_pcs: &[u64],
    ) -> Self {
        let path = export_path.map(PathBuf::from);
        let level = match export_level.as_deref() {
            Some("1") => ExportLevel::OnlyAPC,
            Some("2") => ExportLevel::APCAndOptimizerLoop,
            Some("3") => ExportLevel::APCAndOptimizerSteps,
            _ => ExportLevel::OnlyAPC,
        };
        ExportOptions::new(path, start_pcs, level)
    }

    pub fn export_requested(&self) -> bool {
        self.path.is_some()
    }

    pub fn export_apc<A: Adapter>(
        &mut self,
        apc: &AdapterApcOverPowdrField<A>,
        suffix: Option<&str>,
        bus_map: &BusMap<A::CustomBusTypes>,
    ) {
        let apc = instructions_to_powdr_field::<A>(apc.clone());
        let path = self.write_to_next_file(&ApcWithBusMap { apc: &apc, bus_map }, suffix);

        // For debugging, also serialize a human-readable version of the final precompile
        let rendered = apc.machine.render(bus_map);
        let path = path.with_file_name(format!(
            "{}.txt",
            path.file_stem().unwrap().to_string_lossy()
        ));
        std::fs::write(path, rendered).unwrap();
    }

    pub fn export_apc_from_machine<A: Adapter>(
        &mut self,
        block: SuperBlock<A::Instruction>,
        machine: SymbolicMachine<A::PowdrField>,
        column_allocator: &ColumnAllocator,
        bus_map: &BusMap<A::CustomBusTypes>,
        suffix: Option<&str>,
    ) {
        assert!(self.export_requested());
        let apc = Apc::new(
            block,
            machine,
            AdapterOptimisticConstraints::<A>::empty(),
            column_allocator,
        );
        self.export_apc::<A>(&apc, suffix, bus_map);
    }

    pub fn export_optimizer_outer(&mut self, data: &impl serde::Serialize, suffix: &str) {
        match self.level {
            ExportLevel::APCAndOptimizerLoop | ExportLevel::APCAndOptimizerSteps => {
                self.write_to_next_file(data, Some(suffix));
            }
            _ => {}
        }
    }

    pub fn export_optimizer_outer_constraint_system<T: FieldElement>(
        &mut self,
        constraint_system: &ConstraintSystem<T, AlgebraicReference>,
        suffix: &str,
    ) {
        match self.level {
            ExportLevel::APCAndOptimizerLoop | ExportLevel::APCAndOptimizerSteps => {
                let machine = constraint_system_to_symbolic_machine(constraint_system.clone());
                self.write_to_next_file(&machine, Some(suffix));
            }
            _ => {}
        }
    }

    pub fn export_optimizer_inner(&mut self, data: &impl serde::Serialize, suffix: &str) {
        if let ExportLevel::APCAndOptimizerSteps = self.level {
            self.write_to_next_file(data, Some(suffix));
        }
    }

    pub fn export_optimizer_inner_constraint_system<T, V>(
        &mut self,
        constraint_system: &ConstraintSystem<T, V>,
        suffix: &str,
    ) where
        T: FieldElement,
        V: Ord + Clone + serde::Serialize,
    {
        if let ExportLevel::APCAndOptimizerSteps = self.level {
            self.write_to_next_file(&constraint_system, Some(suffix));
        }
    }

    /// Registers a sequence of variables that have been substituted during optimization,
    /// so that they can be exported together with the final export.
    pub fn register_substituted_variables<Var, Expr>(
        &mut self,
        vars: impl IntoIterator<Item = (Var, Expr)>,
    ) where
        Var: serde::Serialize,
        Expr: serde::Serialize,
    {
        if self.export_requested() {
            self.substituted_variables.extend(
                vars.into_iter()
                    .map(|(v, e)| serde_json::to_string(&(v, e)).unwrap()),
            );
        }
    }

    /// Exports the registered substituted variables to a separate json file.
    pub fn export_substituted_variables(&mut self) {
        if self.export_requested() {
            let path = self.path.clone().unwrap();
            let file_stub = path.file_name().unwrap().to_string_lossy();
            let path = path.with_file_name(format!("{file_stub}_substitutions.json"));
            let mut writer = create_full_path(&path);
            write!(&mut writer, "[{}]", self.substituted_variables.join(",")).unwrap();
            writer.flush().unwrap();
        }
    }

    /// Path to the next file to export to. Uses an increasing sequence number
    /// and also adds the `info` into the file name.
    fn next_path(&mut self, info: Option<&str>) -> PathBuf {
        let seq = self.sequence_number;
        self.sequence_number += 1;
        let path = self.path.clone().unwrap();
        let file_stub = path.file_name().unwrap().to_string_lossy();
        path.with_file_name(format!(
            "{file_stub}_{seq:03}{}.json",
            info.map(|i| format!("_{i}")).unwrap_or_default(),
        ))
    }

    fn write_to_next_file(&mut self, data: &impl serde::Serialize, info: Option<&str>) -> PathBuf {
        let path = self.next_path(info);
        self.write_to_file(data, path.clone());
        path
    }

    fn write_to_file(&mut self, data: &impl serde::Serialize, path: PathBuf) {
        let mut writer = create_full_path(&path);
        serde_json::to_writer(&mut writer, data).unwrap();
        writer.flush().unwrap();
    }
}

fn create_full_path(path: &PathBuf) -> BufWriter<std::fs::File> {
    if let Some(parent) = path.parent() {
        std::fs::create_dir_all(parent).unwrap();
    }
    BufWriter::new(std::fs::File::create(path).unwrap())
}

/// Converts the APC to use an instruction type that stores field elements
/// using a powdr type, so that we do not need to export in Montgomery form.
#[allow(clippy::type_complexity)]
fn instructions_to_powdr_field<A: Adapter>(
    apc: AdapterApcOverPowdrField<A>,
) -> Apc<
    <A as Adapter>::PowdrField,
    SimpleInstruction<<A as Adapter>::PowdrField>,
    <<A as Adapter>::ExecutionState as ExecutionState>::RegisterAddress,
    <<A as Adapter>::ExecutionState as ExecutionState>::Value,
> {
    let block = apc.block.map_instructions(|instr| {
        SimpleInstruction(
            // Extract the data by providing a dummy pc
            // and removing it again.
            instr
                .pc_lookup_row(778)
                .iter()
                .skip(1)
                .map(|x| A::from_field(x.clone()))
                .collect(),
        )
    });

    Apc {
        block,
        machine: apc.machine,
        subs: apc.subs,
        optimistic_constraints: apc.optimistic_constraints,
    }
}

/// Dummy instruction type that is used to store the converted field type.
#[derive(Serialize, Deserialize, Clone)]
pub struct SimpleInstruction<T>(Vec<T>);

impl<T: Display> Display for SimpleInstruction<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0.iter().format(", "))
    }
}

impl<T: Display + Clone> Instruction<T> for SimpleInstruction<T> {
    fn pc_lookup_row(&self, _pc: u64) -> Vec<T> {
        self.0.clone()
    }
}

impl<T> PcStep for SimpleInstruction<T> {
    fn pc_step() -> u32 {
        unimplemented!()
    }
}

#[derive(Serialize, Deserialize)]
pub struct ApcWithBusMap<Apc, BusMap> {
    #[serde(flatten)]
    pub apc: Apc,
    pub bus_map: BusMap,
}


================================================
FILE: autoprecompiles/src/expression.rs
================================================
//! In this module, we instantiate `powdr_expression::AlgebraicExpression` using a
//! custom `AlgebraicReference` type.
use core::ops::{Add, Mul, Neg, Sub};
use powdr_number::ExpressionConvertible;
use serde::{Deserialize, Serialize};
use std::{collections::BTreeMap, hash::Hash, marker::PhantomData, sync::Arc};

use crate::symbolic_machine::{SymbolicBusInteraction, SymbolicConstraint};

pub type AlgebraicExpression<T> = powdr_expression::AlgebraicExpression<T, AlgebraicReference>;

#[derive(Debug, Clone, Eq)]
pub struct AlgebraicReference {
    /// Name of the polynomial - just for informational purposes.
    /// Comparisons are based on the ID.
    pub name: Arc<String>,
    /// Identifier for a reference.
    pub id: u64,
}

impl std::fmt::Display for AlgebraicReference {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.name)
    }
}

impl PartialOrd for AlgebraicReference {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for AlgebraicReference {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.id.cmp(&other.id)
    }
}

impl PartialEq for AlgebraicReference {
    fn eq(&self, other: &Self) -> bool {
        self.id == other.id
    }
}

impl Hash for AlgebraicReference {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        self.id.hash(state);
    }
}

impl Serialize for AlgebraicReference {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        serializer.serialize_str(&format!("{}@{}", self.name, self.id))
    }
}

impl<'de> Deserialize<'de> for AlgebraicReference {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        let s = String::deserialize(deserializer)?;
        let Some(separator_pos) = s.rfind('@') else {
            return Err(serde::de::Error::custom(format!(
                "Invalid format for AlgebraicReference: {s}",
            )));
        };
        let name = Arc::new(s[..separator_pos].to_string());
        let id: u64 = s[separator_pos + 1..].parse().map_err(|_| {
            serde::de::Error::custom(format!(
                "Invalid ID in AlgebraicReference: {}",
                &s[separator_pos + 1..]
            ))
        })?;
        Ok(AlgebraicReference { name, id })
    }
}

/// Tries to convert a `powdr_expression::AlgebraicExpression<T, R>` into a
/// `powdr_expression::AlgebraicExpression<T, AlgebraicReference>`.
pub fn try_convert<T, R: TryInto<AlgebraicReference>>(
    expr: powdr_expression::AlgebraicExpression<T, R>,
) -> Result<AlgebraicExpression<T>, R::Error> {
    match expr {
        powdr_expression::AlgebraicExpression::Reference(reference) => Ok(
            powdr_expression::AlgebraicExpression::Reference(reference.try_into()?),
        ),
        powdr_expression::AlgebraicExpression::Number(n) => {
            Ok(powdr_expression::AlgebraicExpression::Number(n))
        }
        powdr_expression::AlgebraicExpression::BinaryOperation(binary) => {
            Ok(powdr_expression::AlgebraicExpression::BinaryOperation(
                powdr_expression::AlgebraicBinaryOperation {
                    left: Box::new(try_convert(*binary.left)?),
                    op: binary.op,
                    right: Box::new(try_convert(*binary.right)?),
                },
            ))
        }
        powdr_expression::AlgebraicExpression::UnaryOperation(unary) => {
            Ok(powdr_expression::AlgebraicExpression::UnaryOperation(
                powdr_expression::AlgebraicUnaryOperation {
                    op: unary.op,
                    expr: Box::new(try_convert(*unary.expr)?),
                },
            ))
        }
    }
}

/// Evaluate an `AlgebraicExpression` to a generic type, which for example can be an expression or a concrete value.
pub trait AlgebraicEvaluator<F, E>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
    E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
{
    fn eval_const(&self, c: F) -> E;
    fn eval_var(&self, algebraic_var: &AlgebraicReference) -> E;

    fn eval_expr(&self, algebraic_expr: &AlgebraicExpression<F>) -> E {
        algebraic_expr.to_expression(&|n| self.eval_const(*n), &|var| self.eval_var(var))
    }
    fn eval_bus_interaction<'a, 'b>(
        &'a self,
        bus_interaction: &'b SymbolicBusInteraction<F>,
    ) -> ConcreteBusInteraction<E, impl Iterator<Item = E> + 'b>
    where
        'a: 'b,
    {
        let mult = self.eval_expr(&bus_interaction.mult);
        let args = bus_interaction.args.iter().map(|arg| self.eval_expr(arg));
        ConcreteBusInteraction {
            id: bus_interaction.id,
            mult,
            args,
        }
    }

    fn eval_constraint(&self, constraint: &SymbolicConstraint<F>) -> ConcreteConstraint<E> {
        ConcreteConstraint {
            expr: self.eval_expr(&constraint.expr),
        }
    }
}

/// Evaluates an `AlgebraicExpression` to a concrete value by subsituting the polynomial references by known values.
pub struct RowEvaluator<'a, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    pub row: &'a [F],
}

impl<'a, F> RowEvaluator<'a, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    pub fn new(row: &'a [F]) -> Self {
        Self { row }
    }
}

impl<F> AlgebraicEvaluator<F, F> for RowEvaluator<'_, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    fn eval_const(&self, c: F) -> F {
        c
    }

    fn eval_var(&self, algebraic_var: &AlgebraicReference) -> F {
        self.row[algebraic_var.id as usize]
    }
}

/// Evaluates an `AlgebraicExpression` to a concrete value by subsituting the polynomial references by known values where known value is looked up via a column index mapping.
pub struct MappingRowEvaluator<'a, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    pub row: &'a [F],
    pub witness_id_to_index: &'a BTreeMap<u64, usize>,
}

impl<'a, F> MappingRowEvaluator<'a, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    pub fn new(row: &'a [F], witness_id_to_index: &'a BTreeMap<u64, usize>) -> Self {
        Self {
            row,
            witness_id_to_index,
        }
    }
}

impl<F> AlgebraicEvaluator<F, F> for MappingRowEvaluator<'_, F>
where
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Copy,
{
    fn eval_const(&self, c: F) -> F {
        c
    }

    fn eval_var(&self, algebraic_var: &AlgebraicReference) -> F {
        let index = self.witness_id_to_index[&(algebraic_var.id)];
        self.row[index]
    }
}

pub struct ConcreteBusInteraction<E, I> {
    pub id: u64,
    pub mult: E,
    pub args: I,
}

pub struct ConcreteConstraint<E> {
    pub expr: E,
}

/// Evaluates by subsituting the polynomial references by known values, potentially changing the expression type in the process.
pub struct WitnessEvaluator<'a, V, F, E> {
    pub witness: &'a BTreeMap<u64, V>,
    _phantom: PhantomData<(F, E)>,
}

impl<'a, V, F, E> WitnessEvaluator<'a, V, F, E> {
    pub fn new(witness: &'a BTreeMap<u64, V>) -> Self {
        Self {
            witness,
            _phantom: PhantomData,
        }
    }
}

impl<V, F, E> AlgebraicEvaluator<F, E> for WitnessEvaluator<'_, V, F, E>
where
    V: Into<E> + Copy,
    F: Add<Output = F> + Sub<Output = F> + Mul<Output = F> + Neg<Output = F> + Into<E> + Copy,
    E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
{
    fn eval_const(&self, c: F) -> E {
        c.into()
    }

    fn eval_var(&self, algebraic_var: &AlgebraicReference) -> E {
        (*self.witness.get(&algebraic_var.id).unwrap()).into()
    }
}


================================================
FILE: autoprecompiles/src/expression_conversion.rs
================================================
use powdr_constraint_solver::{
    grouped_expression::{GroupedExpression, GroupedExpressionComponent},
    runtime_constant::RuntimeConstant,
};
use powdr_expression::{AlgebraicExpression, AlgebraicUnaryOperation, AlgebraicUnaryOperator};
use powdr_number::{ExpressionConvertible, FieldElement};

/// Turns an algebraic expression into a grouped expression,
/// assuming all [`AlgebraicReference`]s are unknown variables.
pub fn algebraic_to_grouped_expression<T, V>(
    expr: &AlgebraicExpression<T, V>,
) -> GroupedExpression<T, V>
where
    T: FieldElement,
    V: Ord + Clone,
{
    expr.to_expression(&|n| GroupedExpression::from_number(*n), &|reference| {
        GroupedExpression::from_unknown_variable(reference.clone())
    })
}

/// Turns a grouped expression back into an algebraic expression.
/// Tries to simplify the expression wrt negation and constant factors
/// to aid human readability.
pub fn grouped_expression_to_algebraic<T, V>(
    expr: GroupedExpression<T, V>,
) -> powdr_expression::AlgebraicExpression<T, V>
where
    T: FieldElement,
    V: Ord + Clone,
{
    // Turn the expression into a list of to-be-summed items and try to
    // simplify on the way.
    let items = expr.into_summands().filter_map(|c| match c {
        GroupedExpressionComponent::Quadratic(l, r) => {
            let l = grouped_expression_to_algebraic(l);
            let (l, l_negated) = extract_negation_if_possible(l);
            let r = grouped_expression_to_algebraic(r);
            let (r, r_negated) = extract_negation_if_possible(r);
            Some(if l_negated == r_negated {
                l * r
            } else {
                -(l * r)
            })
        }
        GroupedExpressionComponent::Linear(v, c) => Some(if c.is_one() {
            AlgebraicExpression::Reference(v.clone())
        } else if (-c).is_one() {
            -AlgebraicExpression::Reference(v.clone())
        } else if c.is_in_lower_half() {
            AlgebraicExpression::from(c) * AlgebraicExpression::Reference(v.clone())
        } else {
            -(AlgebraicExpression::from(-c) * AlgebraicExpression::Reference(v.clone()))
        }),
        GroupedExpressionComponent::Constant(constant) => {
            (!constant.is_known_zero()).then(|| field_element_to_algebraic_expression(constant))
        }
    });

    // Now order the items by negated and non-negated.
    let mut positive = vec![];
    let mut negated = vec![];
    for item in items {
        let (item, item_negated) = extract_negation_if_possible(item);
        if item_negated {
            negated.push(item);
        } else {
            positive.push(item);
        }
    }
    let positive = positive.into_iter().reduce(|acc, item| acc + item);
    let negated = negated.into_iter().reduce(|acc, item| acc + item);
    match (positive, negated) {
        (Some(positive), Some(negated)) => positive - negated,
        (Some(positive), None) => positive,
        (None, Some(negated)) => -negated,
        (None, None) => AlgebraicExpression::from(T::zero()),
    }
}

fn field_element_to_algebraic_expression<T: FieldElement, V>(v: T) -> AlgebraicExpression<T, V> {
    if v.is_in_lower_half() {
        AlgebraicExpression::from(v)
    } else {
        -AlgebraicExpression::from(-v)
    }
}

/// If `e` is negated, returns the expression without negation and `true`,
/// otherwise returns the un-modified expression and `false`.
fn extract_negation_if_possible<T, V>(
    e: AlgebraicExpression<T, V>,
) -> (AlgebraicExpression<T, V>, bool) {
    match e {
        AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation {
            op: AlgebraicUnaryOperator::Minus,
            expr,
        }) => (*expr, true),
        _ => (e, false),
    }
}


================================================
FILE: autoprecompiles/src/lib.rs
================================================
use crate::adapter::{Adapter, AdapterApc, AdapterVmConfig};
use crate::blocks::{PcStep, SuperBlock};
use crate::bus_map::{BusMap, BusType};
use crate::empirical_constraints::{ConstraintGenerator, EmpiricalConstraints};
use crate::evaluation::AirStats;
use crate::execution::OptimisticConstraints;
use crate::export::ExportOptions;
use crate::expression_conversion::algebraic_to_grouped_expression;
use crate::optimistic::algebraic_references::BlockCellAlgebraicReferenceMapper;
use crate::optimistic::config::optimistic_precompile_config;
use crate::optimistic::execution_constraint_generator::generate_execution_constraints;
use crate::optimistic::execution_literals::optimistic_literals;
use crate::symbolic_machine::{SymbolicConstraint, SymbolicMachine};
use crate::symbolic_machine_generator::convert_apc_field_type;
use adapter::AdapterOptimisticConstraint;
use execution::{
    ExecutionState, LocalOptimisticLiteral, OptimisticConstraint, OptimisticExpression,
    OptimisticLiteral,
};
use expression::{AlgebraicExpression, AlgebraicReference};
use itertools::Itertools;
use powdr::UniqueReferences;
use powdr_constraint_solver::constraint_system::{ComputationMethod, DerivedVariable};
use powdr_expression::{
    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicUnaryOperation,
};
use serde::{Deserialize, Serialize};
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use symbolic_machine_generator::statements_to_symbolic_machine;

use powdr_number::FieldElement;

pub mod adapter;
pub mod blocks;
pub mod bus_map;
pub mod constraint_optimizer;
pub mod empirical_constraints;
pub mod evaluation;
pub mod execution_profile;
pub mod expression;
pub mod expression_conversion;
pub mod low_degree_bus_interaction_optimizer;
pub mod memory_optimizer;
pub mod optimizer;
pub mod pgo;
pub mod powdr;
pub mod range_constraint_optimizer;
mod stats_logger;
pub mod symbolic_machine;
pub mod symbolic_machine_generator;
pub use pgo::{PgoConfig, PgoType};
pub use powdr_constraint_solver::inliner::DegreeBound;
pub mod equivalence_classes;
pub mod execution;
pub mod export;
pub mod optimistic;
pub mod trace_handler;

#[derive(Clone)]
pub struct PowdrConfig {
    /// Number of autoprecompiles to generate.
    pub autoprecompiles: u64,
    /// Number of basic blocks to skip for autoprecompiles.
    /// This is either the largest N if no PGO, or the costliest N with PGO.
    pub skip_autoprecompiles: u64,
    /// Maximum number of basic blocks included in a superblock.
    /// Default of 1 means only basic blocks are considered.
    pub superblock_max_bb_count: u8,
    /// Maximum number of instructions included in an Apc.
    pub apc_max_instructions: u32,
    /// Apcs executed less than the cutoff are ignored.
    pub apc_exec_count_cutoff: u32,
    /// Max degree of constraints.
    pub degree_bound: DegreeBound,
    /// The path to the APC candidates dir, if any.
    pub apc_candidates_dir_path: Option<PathBuf>,
    /// Whether to use optimistic precompiles.
    pub should_use_optimistic_precompiles: bool,
}

impl PowdrConfig {
    pub fn new(autoprecompiles: u64, skip_autoprecompiles: u64, degree_bound: DegreeBound) -> Self {
        Self {
            autoprecompiles,
            skip_autoprecompiles,
            // superblocks disabled by default
            superblock_max_bb_count: 1,
            apc_max_instructions: u32::MAX,
            apc_exec_count_cutoff: 1,
            degree_bound,
            apc_candidates_dir_path: None,
            should_use_optimistic_precompiles: false,
        }
    }

    pub fn with_superblocks(
        mut self,
        max_bb_count: u8,
        max_instructions: Option<u32>,
        exec_count_cutoff: Option<u32>,
    ) -> Self {
        assert!(
            max_bb_count > 0,
            "superblock_max_bb_count must be greater than 0"
        );
        self.superblock_max_bb_count = max_bb_count;
        if let Some(max_instructions) = max_instructions {
            self.apc_max_instructions = max_instructions;
        }
        if let Some(exec_count_cutoff) = exec_count_cutoff {
            self.apc_exec_count_cutoff = exec_count_cutoff;
        }
        self
    }

    pub fn with_apc_candidates_dir<P: AsRef<Path>>(mut self, path: P) -> Self {
        self.apc_candidates_dir_path = Some(path.as_ref().to_path_buf());
        self
    }

    pub fn with_optimistic_precompiles(mut self, should_use_optimistic_precompiles: bool) -> Self {
        self.should_use_optimistic_precompiles = should_use_optimistic_precompiles;
        self
    }
}

#[derive(Debug, Clone)]
pub enum InstructionKind {
    Normal,
    ConditionalBranch,
    UnconditionalBranch,
}

/// A configuration of a VM in which execution is happening.
pub struct VmConfig<'a, M, B, C> {
    /// Maps an opcode to its AIR.
    pub instruction_handler: &'a M,
    /// The bus interaction handler, used by the constraint solver to reason about bus interactions.
    pub bus_interaction_handler: B,
    /// The bus map that maps bus id to bus type
    pub bus_map: BusMap<C>,
}

// We implement Clone manually because deriving it adds a Clone bound to the `InstructionMachineHandler`
impl<'a, M, B: Clone, C: Clone> Clone for VmConfig<'a, M, B, C> {
    fn clone(&self) -> Self {
        VmConfig {
            instruction_handler: self.instruction_handler,
            bus_interaction_handler: self.bus_interaction_handler.clone(),
            bus_map: self.bus_map.clone(),
        }
    }
}

pub trait InstructionHandler {
    type Field;
    type Instruction;
    type AirId;

    /// Returns the degree bound used for the instructions
    fn degree_bound(&self) -> DegreeBound;

    /// Returns the AIR for the given instruction.
    fn get_instruction_air_and_id(
        &self,
        instruction: &Self::Instruction,
    ) -> (Self::AirId, &SymbolicMachine<Self::Field>);

    /// Returns the AIR stats for the given instruction.
    fn get_instruction_air_stats(&self, instruction: &Self::Instruction) -> AirStats;
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Substitution {
    /// The index of the original column in the original air
    pub original_poly_index: usize,
    /// The `poly_id` of the target column in the APC air
    pub apc_poly_id: u64,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Apc<T, I, A, V> {
    /// The block this APC is based on
    pub block: SuperBlock<I>,
    /// The symbolic machine for this APC
    pub machine: SymbolicMachine<T>,
    /// For each original instruction, the substitutions from original columns to APC columns
    pub subs: Vec<Vec<Substitution>>,
    /// The optimistic constraints to be satisfied for this apc to be run
    pub optimistic_constraints: OptimisticConstraints<A, V>,
}

impl<T, I: PcStep, A, V> Apc<T, I, A, V> {
    pub fn subs(&self) -> &[Vec<Substitution>] {
        &self.subs
    }

    pub fn machine(&self) -> &SymbolicMachine<T> {
        &self.machine
    }

    /// The instructions in the block.
    pub fn instructions(&self) -> impl Iterator<Item = &I> {
        self.block.instructions().map(|(_, i)| i)
    }

    /// The PCs of the original basic blocks composing this APC. Can be used to identify the APC.
    pub fn start_pcs(&self) -> Vec<u64> {
        self.block.start_pcs()
    }

    /// Create a new APC based on the given super block, symbolic machine and column allocator
    /// The column allocator only issues the subs which are actually used in the machine
    fn new(
        block: SuperBlock<I>,
        machine: SymbolicMachine<T>,
        optimistic_constraints: OptimisticConstraints<A, V>,
        column_allocator: &ColumnAllocator,
    ) -> Self {
        // Get all poly_ids in the machine
        let all_references = machine
            .unique_references()
            .map(|r| r.id)
            .collect::<BTreeSet<_>>();
        // Only keep substitutions from the column allocator if the target poly_id is used in the machine
        let subs = column_allocator
            .subs
            .iter()
            .map(|subs| {
                subs.iter()
                    .enumerate()
                    .filter_map(|(original_poly_index, apc_poly_id)| {
                        all_references
                            .contains(apc_poly_id)
                            .then_some(Substitution {
                                original_poly_index,
                                apc_poly_id: *apc_poly_id,
                            })
                    })
                    .collect_vec()
            })
            .collect();
        Self {
            block,
            machine,
            subs,
            optimistic_constraints,
        }
    }
}

/// Allocates global poly_ids and keeps track of substitutions
pub struct ColumnAllocator {
    /// For each original air, for each original column index, the associated poly_id in the APC air
    subs: Vec<Vec<u64>>,
    /// The next poly_id to issue
    next_poly_id: u64,
}

impl ColumnAllocator {
    pub fn from_max_poly_id_of_machine(machine: &SymbolicMachine<impl FieldElement>) -> Self {
        Self {
            subs: Vec::new(),
            next_poly_id: machine.main_columns().map(|c| c.id).max().unwrap_or(0) + 1,
        }
    }

    pub fn issue_next_poly_id(&mut self) -> u64 {
        let id = self.next_poly_id;
        self.next_poly_id += 1;
        id
    }

    /// Returns whether the given poly_id is known (i.e., was issued by this allocator)
    pub fn is_known_id(&self, poly_id: u64) -> bool {
        poly_id < self.next_poly_id
    }
}

pub fn build<A: Adapter>(
    block: SuperBlock<A::Instruction>,
    vm_config: AdapterVmConfig<A>,
    degree_bound: DegreeBound,
    mut export_options: ExportOptions,
    empirical_constraints: &EmpiricalConstraints,
) -> Result<AdapterApc<A>, crate::constraint_optimizer::Error> {
    let start = std::time::Instant::now();

    let (mut machine, column_allocator) = statements_to_symbolic_machine::<A>(
        &block,
        vm_config.instruction_handler,
        &vm_config.bus_map,
    );

    // Generate constraints for optimistic precompiles.
    let should_generate_execution_constraints =
        optimistic_precompile_config().restrict_optimistic_precompiles;
    let algebraic_references =
        BlockCellAlgebraicReferenceMapper::new(&column_allocator.subs, machine.main_columns());
    let empirical_constraints = empirical_constraints.for_block(&block);

    // TODO: Use execution constraints
    let (empirical_constraints, _execution_constraints) = if should_generate_execution_constraints {
        // Filter constraints to only contain execution-checkable columns,
        // generate execution constraints for them.
        let optimistic_literals = optimistic_literals::<A>(&block, &vm_config, &degree_bound);

        let empirical_constraints = empirical_constraints.filtered(|block_cell| {
            let algebraic_reference = algebraic_references
                .get_algebraic_reference(block_cell)
                .unwrap();
            optimistic_literals.contains_key(algebraic_reference)
        });

        let empirical_constraints =
            ConstraintGenerator::<A>::new(empirical_constraints, algebraic_references, &block)
                .generate_constraints();

        let execution_constraints =
            generate_execution_constraints(&empirical_constraints, &optimistic_literals);
        (empirical_constraints, execution_constraints)
    } else {
        // Don't filter empirical constraints, return empty execution constraints.
        let empirical_constraints =
            ConstraintGenerator::<A>::new(empirical_constraints, algebraic_references, &block)
                .generate_constraints();
        (empirical_constraints, vec![])
    };

    // Add empirical constraints to the baseline
    machine
        .constraints
        .extend(empirical_constraints.into_iter().map(Into::into));

    if export_options.export_requested() {
        export_options.export_apc_from_machine::<A>(
            block.clone(),
            machine.clone(),
            &column_allocator,
            &vm_config.bus_map,
            Some("unopt"),
        );
    }

    let labels = [("apc_start_pc", block.start_pcs().into_iter().join("_"))];
    metrics::counter!("before_opt_cols", &labels)
        .absolute(machine.unique_references().count() as u64);
    metrics::counter!("before_opt_constraints", &labels)
        .absolute(machine.unique_references().count() as u64);
    metrics::counter!("before_opt_interactions", &labels)
        .absolute(machine.unique_references().count() as u64);

    let (machine, column_allocator) = optimizer::optimize::<_, _, _, A::MemoryBusInteraction<_>>(
        machine,
        vm_config.bus_interaction_handler,
        degree_bound,
        &vm_config.bus_map,
        column_allocator,
        &mut export_options,
    )?;

    // add guards to constraints that are not satisfied by zeroes
    let (machine, column_allocator) = add_guards(machine, column_allocator);

    metrics::counter!("after_opt_cols", &labels)
        .absolute(machine.unique_references().count() as u64);
    metrics::counter!("after_opt_constraints", &labels)
        .absolute(machine.unique_references().count() as u64);
    metrics::counter!("after_opt_interactions", &labels)
        .absolute(machine.unique_references().count() as u64);

    // TODO: for now, we only include optimistic constraints related to superblock PCs.
    // Optimistic constraints from empirical constraints are still missing.
    let pc_constraints = superblock_pc_constraints::<A>(&block);
    let optimistic_constraints = OptimisticConstraints::from_constraints(pc_constraints);

    let apc = Apc::new(block, machine, optimistic_constraints, &column_allocator);

    if export_options.export_requested() {
        export_options.export_apc::<A>(&apc, None, &vm_config.bus_map);
    }

    let apc = convert_apc_field_type(apc, &A::into_field);

    metrics::gauge!("apc_gen_time_ms", &labels).set(start.elapsed().as_millis() as f64);

    Ok(apc)
}

/// Generate optimistic constraints for superblock jumps
fn superblock_pc_constraints<A: Adapter>(
    block: &SuperBlock<A::Instruction>,
) -> Vec<AdapterOptimisticConstraint<A>> {
    block
        .instruction_indexed_start_pcs()
        .into_iter()
        .map(|(instr_idx, pc)| {
            let left = OptimisticExpression::Literal(OptimisticLiteral {
                instr_idx,
                val: LocalOptimisticLiteral::Pc,
            });
            let Ok(pc_value) =
                <<A as Adapter>::ExecutionState as ExecutionState>::Value::try_from(pc)
            else {
                panic!("PC doesn't fit in Value type");
            };
            let right = OptimisticExpression::Number(pc_value);
            OptimisticConstraint { left, right }
        })
        .collect()
}

fn satisfies_zero_witness<T: FieldElement>(expr: &AlgebraicExpression<T>) -> bool {
    let mut zeroed_expr = expr.clone();
    powdr::make_refs_zero(&mut zeroed_expr);
    let zeroed_expr = algebraic_to_grouped_expression(&zeroed_expr);
    zeroed_expr.try_to_number().unwrap().is_zero()
}

/// Adds `is_valid` guards to constraints without increasing its degree.
/// This implementation always guards the LHS of multiplications.
/// In the future this could be changed to minimize the number of guards added.
/// Assumption:
/// - `expr` is already simplified, i.e., expressions like (3 + 4) and (x * 1) do not appear.
fn add_guards_constraint<T: FieldElement>(
    expr: AlgebraicExpression<T>,
    is_valid: &AlgebraicExpression<T>,
) -> AlgebraicExpression<T> {
    if satisfies_zero_witness(&expr) {
        return expr;
    }

    match expr {
        AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation { left, op, right }) => {
            let left = add_guards_constraint(*left, is_valid);
            let right = match op {
                AlgebraicBinaryOperator::Add | AlgebraicBinaryOperator::Sub => {
                    Box::new(add_guards_constraint(*right, is_valid))
                }
                AlgebraicBinaryOperator::Mul => right,
            };
            AlgebraicExpression::new_binary(left, op, *right)
        }
        AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation { op, expr }) => {
            let inner = add_guards_constraint(*expr, is_valid);
            AlgebraicExpression::new_unary(op, inner)
        }
        AlgebraicExpression::Number(..) => expr * is_valid.clone(),
        _ => expr,
    }
}

/// Adds an `is_valid` guard to all constraints and bus interactions, if needed.
fn add_guards<T: FieldElement>(
    mut machine: SymbolicMachine<T>,
    mut column_allocator: ColumnAllocator,
) -> (SymbolicMachine<T>, ColumnAllocator) {
    let pre_degree = machine.degree();

    let is_valid_ref = AlgebraicReference {
        name: Arc::new("is_valid".to_string()),
        id: column_allocator.issue_next_poly_id(),
    };
    let is_valid = AlgebraicExpression::Reference(is_valid_ref.clone());

    machine.derived_columns.push(DerivedVariable::new(
        is_valid_ref,
        ComputationMethod::Constant(T::one()),
    ));

    machine.constraints = machine
        .constraints
        .into_iter()
        .map(|c| add_guards_constraint(c.expr, &is_valid).into())
        .collect();

    let mut is_valid_mults: Vec<SymbolicConstraint<T>> = Vec::new();
    for b in &mut machine.bus_interactions {
        if !satisfies_zero_witness(&b.mult) {
            // guard the multiplicity by `is_valid`
            b.mult = is_valid.clone() * b.mult.clone();
            // TODO this would not have to be cloned if we had *=
            //c.expr *= guard.clone();
        } else {
            // if it's zero, then we do not have to change the multiplicity, but we need to force it to be zero on non-valid rows with a constraint
            let one = AlgebraicExpression::Number(1u64.into());
            let e = ((one - is_valid.clone()) * b.mult.clone()).into();
            is_valid_mults.push(e);
        }
    }

    machine.constraints.extend(is_valid_mults);

    // if pre_degree is 0, is_valid is added to the multiplicities of the bus interactions, thus the degree increases from 0 to 1
    if pre_degree != 0 && !machine.bus_interactions.is_empty() {
        assert_eq!(
            pre_degree,
            machine.degree(),
            "Degree should not change after adding guards"
        );
    }

    // This needs to be added after the assertion above because it's a quadratic constraint
    // so it may increase the degree of the machine.
    machine.constraints.push(powdr::make_bool(is_valid).into());

    (machine, column_allocator)
}


================================================
FILE: autoprecompiles/src/low_degree_bus_interaction_optimizer.rs
================================================
use itertools::Itertools;
use powdr_constraint_solver::constraint_system::{
    AlgebraicConstraint, BusInteraction, BusInteractionHandler, ConstraintSystem,
};
use powdr_constraint_solver::grouped_expression::GroupedExpression;
use powdr_constraint_solver::inliner::DegreeBound;
use powdr_constraint_solver::range_constraint::RangeConstraint;
use powdr_constraint_solver::runtime_constant::RuntimeConstant;
use powdr_constraint_solver::solver::Solver;
use powdr_number::FieldElement;
use powdr_number::LargeInt;
use std::fmt::Display;
use std::hash::Hash;
use std::marker::PhantomData;

use crate::constraint_optimizer::IsBusStateful;
use crate::range_constraint_optimizer::{RangeConstraintHandler, RangeConstraints};

/// An optimizer that replaces some stateless bus interactions (a.k.a. lookups)
/// by low-degree algebraic constraints.
pub struct LowDegreeBusInteractionOptimizer<'a, T, V, S, B> {
    solver: &'a mut S,
    bus_interaction_handler: B,
    degree_bound: DegreeBound,
    _phantom: PhantomData<(T, V)>,
}

struct LowDegreeReplacement<T: FieldElement, V> {
    constraint: AlgebraicConstraint<GroupedExpression<T, V>>,
    range_constraints: RangeConstraints<T, V>,
}

impl<
        'a,
        T: FieldElement,
        V: Ord + Clone + Ord + Eq + Display + Hash,
        S: Solver<T, V>,
        B: BusInteractionHandler<T> + IsBusStateful<T> + RangeConstraintHandler<T>,
    > LowDegreeBusInteractionOptimizer<'a, T, V, S, B>
{
    pub fn new(solver: &'a mut S, bus_interaction_handler: B, degree_bound: DegreeBound) -> Self {
        Self {
            solver,
            bus_interaction_handler,
            degree_bound,
            _phantom: PhantomData,
        }
    }

    pub fn optimize(self, mut system: ConstraintSystem<T, V>) -> ConstraintSystem<T, V> {
        let mut new_constraints = vec![];
        system.bus_interactions = system
            .bus_interactions
            .into_iter()
            .flat_map(|bus_int| {
                if let Some(LowDegreeReplacement {
                    constraint: replacement,
                    range_constraints,
                }) = self.try_replace_bus_interaction(&bus_int)
                {
                    // If we found a replacement, add the polynomial constraints (unless it is
                    // trivially zero) and replace the bus interaction with interactions implementing
                    // the range constraints.
                    // Note that many of these may be optimized away by the range constraint optimizer.
                    if !replacement.is_redundant() {
                        new_constraints.push(replacement);
                    }

                    self.bus_interaction_handler
                        .batch_make_range_constraints(range_constraints)
                        // It can be that the VM cannot implement the precise range constraint (although this
                        // does not really happen in practice!).
                        // For soundness, it is essential that the constraint is not wider than the
                        // one we used to generate all inputs. So if `batch_make_range_constraints`
                        // errors out, we keep the original bus interaction.
                        // Note that we still add the polynomial constraints, because it'll likely
                        // lead to columns being inlined.
                        .unwrap_or(vec![bus_int])
                } else {
                    // Keep the bus interaction as is if a replacement can't be found.
                    vec![bus_int]
                }
            })
            .collect();

        // Knowing the low-degree functions might help the solver.
        // The range constraints do not need to be added, because they don't carry information
        // that is not already implied by the existing bus interactions.
        self.solver
            .add_algebraic_constraints(new_constraints.iter().cloned());

        system.algebraic_constraints.extend(new_constraints);
        system
    }

    /// Checks whether a bus interaction can be replaced by a low-degree constraint + range checks.
    /// Returns None if no replacement is found.
    fn try_replace_bus_interaction(
        &self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
    ) -> Option<LowDegreeReplacement<T, V>> {
        let bus_id = bus_interaction.bus_id.try_to_number()?;
        if self.bus_interaction_handler.is_stateful(bus_id) {
            return None;
        }

        self.symbolic_function_candidates_with_small_domain(bus_interaction)
            .into_iter()
            .find_map(|symbolic_function| {
                let low_degree_function =
                    self.find_low_degree_function(bus_interaction, &symbolic_function)?;

                // Build polynomial constraint
                let symbolic_inputs = symbolic_function
                    .inputs
                    .iter()
                    .cloned()
                    .map(|input| input.expression)
                    .collect();
                let low_degree_function = low_degree_function(symbolic_inputs);
                let polynomial_constraint = AlgebraicConstraint::assert_eq(
                    symbolic_function.output.expression,
                    low_degree_function,
                );

                // Check degree
                let within_degree_bound =
                    polynomial_constraint.degree() <= self.degree_bound.identities;
                if within_degree_bound {
                    let range_constraints = symbolic_function
                        .inputs
                        .into_iter()
                        .map(|field| (field.expression, field.range_constraint))
                        .collect();
                    Some(LowDegreeReplacement {
                        constraint: polynomial_constraint,
                        range_constraints,
                    })
                } else {
                    None
                }
            })
    }

    /// Given a bus interaction of 2 or 3 unknown fields, finds all combinations of (symbolic)
    /// inputs and outputs where the input space is small enough.
    fn symbolic_function_candidates_with_small_domain(
        &self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
    ) -> Vec<SymbolicFunction<T, V>> {
        let unknown_fields = bus_interaction
            .payload
            .iter()
            .cloned()
            .enumerate()
            .filter(|(_i, expr)| expr.try_to_number().is_none())
            .map(|(index, expression)| {
                let range_constraint = self.solver.range_constraint_for_expression(&expression);
                SymbolicField {
                    index,
                    expression,
                    range_constraint,
                }
            })
            .collect_vec();

        let unknown_field_count = unknown_fields.len();
        // Currently, we only have hypotheses for:
        // - 2 unknown fields (1 input, 1 output)
        // - 3 unknown fields (2 inputs, 1 output)
        if !(unknown_field_count == 2 || unknown_field_count == 3) {
            return Vec::new();
        }

        unknown_fields
            .into_iter()
            .permutations(unknown_field_count)
            .map(|mut fields| {
                let output = fields.pop().unwrap();
                SymbolicFunction {
                    inputs: fields,
                    output,
                }
            })
            .filter(|function| {
                self.has_few_possible_values(
                    function.inputs.iter().map(|f| f.range_constraint),
                    MAX_DOMAIN_SIZE,
                )
            })
            .collect_vec()
    }

    /// Given a list of range constraints, computes whether space of all possible values
    /// is small enough.
    fn has_few_possible_values(
        &self,
        range_constraints: impl Iterator<Item = RangeConstraint<T>>,
        max_size: u64,
    ) -> bool {
        range_constraints
            .map(|rc| {
                // TODO: This should share code with `has_few_possible_assignments`,
                // But this only currently only considers the range width which ignores the mask
                // and might be way larger than the actual number of allowed values.
                rc.size_estimate().try_into_u64().and_then(|size| {
                    if size < 1 << 16 {
                        Some(rc.allowed_values().count() as u64)
                    } else {
                        None
                    }
                })
            })
            .try_fold(1u64, |acc, x| acc.checked_mul(x?))
            .is_some_and(|count| count <= max_size)
    }

    /// Given a bus interaction and a symbolic input-output pair, tries to find a low-degree function
    /// by testing all of the hard-coded hypotheses against set of all concrete input-output pairs.
    fn find_low_degree_function(
        &self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
        symbolic_function: &SymbolicFunction<T, V>,
    ) -> Option<LowDegreeFunction<T, V>> {
        let mut hypotheses = hypotheses(symbolic_function.inputs.len());

        // Generate the function graph, to match against the hypotheses.
        let all_possible_assignments =
            self.concrete_input_output_pairs(bus_interaction, symbolic_function);

        for assignment in all_possible_assignments {
            let Ok((inputs, output)) = assignment else {
                // We can't enumerate all possible assignments, so the hypotheses can't be tested.
                return None;
            };
            let inputs = inputs
                .into_iter()
                .map(|value| GroupedExpression::from_number(value))
                .collect::<Vec<_>>();
            hypotheses.retain(|hypothesis| {
                let hypothesis_evaluation = hypothesis(inputs.clone());
                hypothesis_evaluation.try_to_number().unwrap() == output
            });
            if hypotheses.is_empty() {
                // No hypothesis left
                return None;
            }
        }

        // If we got this far, the hypothesis is correct!
        Some(hypotheses.into_iter().exactly_one().unwrap_or_else(|_| {
            panic!("Expected exactly one multilinear extension, but got multiple.")
        }))
    }

    /// Generate all concrete input-output pairs given a symbolic one.
    ///
    /// The inputs are generated as the cross product of all allowed values of the
    /// individual inputs.
    /// The outputs are generated by asking the bus interaction handler for each input assignment.
    ///
    /// If at any time (1) the inputs violate a constraint or (2) the outputs are not unique,
    /// an error is yielded.
    fn concrete_input_output_pairs<'b>(
        &'b self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
        input_output_pair: &'b SymbolicFunction<T, V>,
    ) -> impl Iterator<Item = Result<(Vec<T>, T), ()>> + 'b {
        let bus_interaction = bus_interaction.to_range_constraints(self.solver);

        // Consider all possible input assignments, which is the cross product of all allowed values.
        let input_assignments = input_output_pair
            .inputs
            .iter()
            .map(move |input| {
                input
                    .range_constraint
                    .allowed_values()
                    .map(|v| (input.index, v))
                    .collect_vec()
            })
            .multi_cartesian_product();

        // For each input assignment, try it and ask the bus interaction handler if there
        // is a unique output assignment.
        input_assignments.map(move |assignment| {
            // Set all inputs to concrete values
            let mut bus_interaction = bus_interaction.clone();
            for (i, value) in assignment.iter() {
                bus_interaction.payload[*i] = RangeConstraint::from_value(*value);
            }

            let inputs = assignment.into_iter().map(|(_i, value)| value).collect();

            // Get the output from the bus interaction handler, if it exists and is unique.
            let output = self
                .bus_interaction_handler
                .handle_bus_interaction_checked(bus_interaction)
                // If the assignment violates a constraint, return an error.
                .map_err(|_| ())?
                .payload[input_output_pair.output.index]
                .try_to_single_value()
                // If the output is not unique, return an error.
                .ok_or(())?;
            Ok((inputs, output))
        })
    }
}

/// Represents a low-degree function, mapping a list of inputs to a single output.
type LowDegreeFunction<T, V> = Box<dyn Fn(Vec<GroupedExpression<T, V>>) -> GroupedExpression<T, V>>;

/// The maximum size of the input domain for low-degree functions.
const MAX_DOMAIN_SIZE: u64 = 256;

/// Represents a bus interaction field.
#[derive(Clone, Debug)]
struct SymbolicField<T: FieldElement, V> {
    /// The index into the bus interaction payload
    index: usize,
    /// The expression in the bus interaction payload
    expression: GroupedExpression<T, V>,
    /// The range constraint for the expression
    range_constraint: RangeConstraint<T>,
}

#[derive(Clone, Debug)]
struct SymbolicFunction<T: FieldElement, V> {
    inputs: Vec<SymbolicField<T, V>>,
    output: SymbolicField<T, V>,
}

/// Some well-known low-degree functions that are tested against the input-output pairs.
fn hypotheses<T: FieldElement, V: Ord + Clone + Hash + Eq>(
    num_inputs: usize,
) -> Vec<LowDegreeFunction<T, V>> {
    match num_inputs {
        1 => vec![
            // Identity function
            Box::new(|inputs| inputs[0].clone()),
            // Logical not (1 bit)
            Box::new(|inputs| GroupedExpression::from_number(T::from_u64(1)) - inputs[0].clone()),
            // Logical not (8 bit)
            Box::new(|inputs| {
                GroupedExpression::from_number(T::from_u64(0xff)) - inputs[0].clone()
            }),
            // Logical not (16 bit)
            Box::new(|inputs| {
                GroupedExpression::from_number(T::from_u64(0xffff)) - inputs[0].clone()
            }),
        ],
        2 => vec![
            // Identity on the first input. Note that we don't have to add identity on the second input,
            // because we test all possible permutations of inputs.
            Box::new(|inputs| inputs[0].clone()),
            // x + y
            Box::new(|inputs| inputs[0].clone() + inputs[1].clone()),
            // AND on bits:
            Box::new(|inputs| inputs[0].clone() * inputs[1].clone()),
            // OR on bits:
            Box::new(|inputs| {
                inputs[0].clone() + inputs[1].clone() - (inputs[0].clone() * inputs[1].clone())
            }),
            // XOR on bits:
            Box::new(|inputs| {
                inputs[0].clone() + inputs[1].clone()
                    - GroupedExpression::from_number(T::from_u64(2))
                        * (inputs[0].clone() * inputs[1].clone())
            }),
        ],
        _ => panic!("Unexpected number of inputs: {num_inputs}"),
    }
}

#[cfg(test)]
mod tests {

    use std::array::from_fn;

    use powdr_constraint_solver::solver::new_solver;
    use powdr_number::BabyBearField;

    use crate::range_constraint_optimizer::{MakeRangeConstraintsError, RangeConstraints};

    use super::*;

    pub type Var = &'static str;
    pub fn var(name: Var) -> GroupedExpression<BabyBearField, Var> {
        GroupedExpression::from_unknown_variable(name)
    }

    pub fn constant(value: u64) -> GroupedExpression<BabyBearField, Var> {
        GroupedExpression::from_number(BabyBearField::from(value))
    }

    #[derive(Clone, Debug)]
    struct XorBusHandler;
    impl BusInteractionHandler<BabyBearField> for XorBusHandler {
        fn handle_bus_interaction(
            &self,
            bus_interaction: BusInteraction<RangeConstraint<BabyBearField>>,
        ) -> BusInteraction<RangeConstraint<BabyBearField>> {
            let range_constraints = match (
                bus_interaction.payload[0].try_to_single_value(),
                bus_interaction.payload[1].try_to_single_value(),
            ) {
                // If x and y are known, compute z
                (Some(x), Some(y)) => {
                    let z = BabyBearField::from(x.to_degree() ^ y.to_degree());
                    [
                        RangeConstraint::from_value(x),
                        RangeConstraint::from_value(y),
                        RangeConstraint::from_value(z),
                    ]
                }
                // By default, just return byte range constraints
                _ => from_fn(|_i| RangeConstraint::from_mask(0xffu32)),
            };
            BusInteraction {
                bus_id: bus_interaction.bus_id,
                payload: range_constraints.into_iter().collect(),
                multiplicity: bus_interaction.multiplicity,
            }
        }
    }
    impl IsBusStateful<BabyBearField> for XorBusHandler {
        fn is_stateful(&self, _bus_id: BabyBearField) -> bool {
            false
        }
    }
    impl RangeConstraintHandler<BabyBearField> for XorBusHandler {
        fn pure_range_constraints<V: Ord + Clone + Eq + Display + Hash>(
            &self,
            _bus_interaction: &BusInteraction<GroupedExpression<BabyBearField, V>>,
        ) -> Option<RangeConstraints<BabyBearField, V>> {
            unreachable!()
        }

        fn batch_make_range_constraints<V: Ord + Clone + Eq + Display + Hash>(
            &self,
            _range_constraints: RangeConstraints<BabyBearField, V>,
        ) -> Result<
            Vec<BusInteraction<GroupedExpression<BabyBearField, V>>>,
            MakeRangeConstraintsError,
        > {
            unreachable!()
        }
    }

    fn compute_replacement(
        mut solver: impl Solver<BabyBearField, Var>,
        bus_interaction: &BusInteraction<GroupedExpression<BabyBearField, Var>>,
    ) -> Option<AlgebraicConstraint<GroupedExpression<BabyBearField, Var>>> {
        let optimizer = LowDegreeBusInteractionOptimizer {
            solver: &mut solver,
            bus_interaction_handler: XorBusHandler,
            degree_bound: DegreeBound {
                identities: 2,
                bus_interactions: 1,
            },
            _phantom: PhantomData,
        };
        optimizer
            .try_replace_bus_interaction(bus_interaction)
            .map(|v| v.constraint)
    }

    #[test]
    fn test_try_replace_bus_interaction_generic_xor() {
        let mut solver = new_solver(ConstraintSystem::default(), XorBusHandler);
        // The input search space is small, but xor is not linear.
        solver.add_range_constraint(&"x", RangeConstraint::from_mask(0xfu32));
        solver.add_range_constraint(&"y", RangeConstraint::from_mask(0xfu32));
        let bus_interaction = BusInteraction {
            bus_id: constant(0),
            payload: vec![var("x"), var("y"), var("z")],
            multiplicity: constant(1),
        };
        let replacement = compute_replacement(solver, &bus_interaction);
        assert!(replacement.is_none());
    }

    #[test]
    fn test_try_replace_bus_interaction_logical_not() {
        let mut solver = new_solver(ConstraintSystem::default(), XorBusHandler);
        // not(x) is a linear function (255 - x).
        solver.add_range_constraint(&"x", RangeConstraint::from_mask(0xffu32));
        let bus_interaction = BusInteraction {
            bus_id: constant(0),
            payload: vec![var("x"), constant(0xff), var("z")],
            multiplicity: constant(1),
        };
        let Some(replacement) = compute_replacement(solver, &bus_interaction) else {
            panic!("Expected a replacement")
        };
        assert_eq!(replacement.to_string(), "x + z - 255 = 0");
    }

    #[test]
    fn test_try_replace_bus_interaction_binary_inputs() {
        let mut solver = new_solver(ConstraintSystem::default(), XorBusHandler);
        // Any function on two bits has a multilinear extension.
        solver.add_range_constraint(&"x", RangeConstraint::from_mask(1u32));
        solver.add_range_constraint(&"y", RangeConstraint::from_mask(1u32));
        let bus_interaction = BusInteraction {
            bus_id: constant(0),
            payload: vec![var("x"), var("y"), var("z")],
            multiplicity: constant(1),
        };
        let Some(replacement) = compute_replacement(solver, &bus_interaction) else {
            panic!("Expected a replacement")
        };
        assert_eq!(replacement.to_string(), "(2 * x) * (y) - x - y + z = 0");
    }

    #[test]
    fn test_try_replace_bus_interaction_disjoint_masks() {
        let mut solver = new_solver(ConstraintSystem::default(), XorBusHandler);
        // Because the masks are disjoint, there is a multilinear extension: z = x + y.
        solver.add_range_constraint(&"x", RangeConstraint::from_mask(0x0fu32));
        solver.add_range_constraint(&"y", RangeConstraint::from_mask(0xf0u32));
        let bus_interaction = BusInteraction {
            bus_id: constant(0),
            payload: vec![var("x"), var("y"), var("z")],
            multiplicity: constant(1),
        };
        let Some(replacement) = compute_replacement(solver, &bus_interaction) else {
            panic!("Expected a replacement")
        };
        assert_eq!(replacement.to_string(), "-(x + y - z) = 0");
    }
}


================================================
FILE: autoprecompiles/src/memory_optimizer.rs
================================================
use std::collections::{HashMap, HashSet};
use std::fmt::Display;
use std::hash::Hash;

use itertools::Itertools;
use powdr_constraint_solver::constraint_system::{
    AlgebraicConstraint, BusInteraction, ConstraintSystem,
};
use powdr_constraint_solver::grouped_expression::GroupedExpression;
use powdr_constraint_solver::solver::Solver;
use powdr_number::FieldElement;

/// Optimizes bus sends that correspond to general-purpose memory read and write operations.
/// It works best if all read-write-operation addresses are fixed offsets relative to some
/// symbolic base address. If stack and heap access operations are mixed, this is usually violated.
pub fn optimize_memory<
    T: FieldElement,
    V: Hash + Eq + Clone + Ord + Display,
    M: MemoryBusInteraction<T, V>,
>(
    mut system: ConstraintSystem<T, V>,
    solver: &mut impl Solver<T, V>,
    memory_bus_id: Option<u64>,
) -> ConstraintSystem<T, V> {
    // In the absence of memory bus, we return the system unchanged
    let memory_bus_id = match memory_bus_id {
        Some(id) => id,
        None => {
            return system;
        }
    };

    // TODO use the solver here.
    let (to_remove, new_constraints) =
        redundant_memory_interactions_indices::<T, V, M>(&system, solver, memory_bus_id);
    let to_remove = to_remove.into_iter().collect::<HashSet<_>>();
    system.bus_interactions = system
        .bus_interactions
        .into_iter()
        .enumerate()
        .filter_map(|(i, bus)| (!to_remove.contains(&i)).then_some(bus))
        .collect();
    solver.add_algebraic_constraints(new_constraints.iter().cloned());
    // TODO perform substitutions instead
    system.algebraic_constraints.extend(new_constraints);

    system
}

#[derive(Debug, Copy, Clone)]
/// The type of the memory bus interaction.
pub enum MemoryOp {
    /// Get the previous value from memory.
    GetPrevious,
    /// Set the new value in memory.
    SetNew,
}

/// A recoverable error when trying to convert a bus interaction to a memory bus interaction.
/// For example, it might be that we don't know the bus ID or multiplicity yet.
pub struct MemoryBusInteractionConversionError;

/// A bus interaction that corresponds to half of a memory operation,
/// i.e. either a "get previous" or a "set new" operation.
/// Note that the order of memory bus interactions as they appear in the constraint system
/// is assumed to be chronological.
pub trait MemoryBusInteraction<T, V>: Sized {
    /// The address type of the memory bus interaction.
    /// We assume that it can be represented as a list of expressions of a *static* size, i.e.,
    /// `addr.into_iter().count()` should always return the same value.
    /// If there are different memories (e.g. register memory and heap memory), this type can be
    /// a composite address.
    type Address: IntoIterator<Item = GroupedExpression<T, V>>;

    /// Tries to convert a `BusInteraction` to a `MemoryBusInteraction`.
    ///
    /// Returns `Ok(None)` if we know that the bus interaction is not a memory bus interaction.
    /// Returns `Err(_)` if the bus interaction is a memory bus interaction but could not be converted properly
    /// (usually because the multiplicity is not -1 or 1).
    /// Otherwise returns `Ok(Some(memory_bus_interaction))`
    fn try_from_bus_interaction(
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
        memory_bus_id: u64,
    ) -> Result<Option<Self>, MemoryBusInteractionConversionError>;

    /// Returns the address of the memory bus interaction.
    fn addr(&self) -> Self::Address;

    /// Returns the data part of the memory bus interaction.
    fn data(&self) -> &[GroupedExpression<T, V>];

    /// Returns the timestamp part of the memory bus interaction.
    fn timestamp_limbs(&self) -> &[GroupedExpression<T, V>];

    /// Returns the operation of the memory bus interaction.
    fn op(&self) -> MemoryOp;
}

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
/// A memory address, represented as a list of expressions.
/// By converting from `MemoryBusInteraction::Address` to `Address<T, V>`,
/// we can make sure that its `Eq` implementation is the expected one: Two addresses
/// are equal if all their parts are equal.
struct Address<T, V>(Vec<GroupedExpression<T, V>>);

impl<I, T, V> From<I> for Address<T, V>
where
    I: IntoIterator<Item = GroupedExpression<T, V>>,
{
    fn from(exprs: I) -> Self {
        Self(exprs.into_iter().collect())
    }
}

struct MemoryContent<T, V> {
    bus_index: usize,
    data: Vec<GroupedExpression<T, V>>,
    timestamp_limbs: Vec<GroupedExpression<T, V>>,
}

impl<T: Clone, V: Clone> MemoryContent<T, V> {
    fn from_bus_interaction<M: MemoryBusInteraction<T, V>>(bus_index: usize, mem_int: M) -> Self {
        Self {
            bus_index,
            data: mem_int.data().to_vec(),
            timestamp_limbs: mem_int.timestamp_limbs().to_vec(),
        }
    }
}

/// Tries to find indices of bus interactions that can be removed in the given machine
/// and also returns a set of new constraints to be added.
fn redundant_memory_interactions_indices<
    T: FieldElement,
    V: Ord + Clone + Hash + Display,
    M: MemoryBusInteraction<T, V>,
>(
    system: &ConstraintSystem<T, V>,
    solver: &mut impl Solver<T, V>,
    memory_bus_id: u64,
) -> (
    Vec<usize>,
    Vec<AlgebraicConstraint<GroupedExpression<T, V>>>,
) {
    let mut new_constraints = Vec::new();

    // Track memory contents by memory type while we go through bus interactions.
    // This maps an address to the index of the previous send on that address, the
    // data currently stored there and the timestamp used in the last send.
    let mut memory_contents: HashMap<Address<T, V>, MemoryContent<T, V>> = Default::default();
    let mut to_remove: Vec<usize> = Default::default();

    // TODO we assume that memory interactions are sorted by timestamp.
    for (index, bus_int) in system.bus_interactions.iter().enumerate() {
        let mem_int = match M::try_from_bus_interaction(bus_int, memory_bus_id) {
            Ok(Some(mem_int)) => mem_int,
            Ok(None) => continue,
            Err(_) => {
                // This interaction might be going to memory, but we do not know
                // the multiplicity. Delete all knowledge.
                // TODO If we can still clearly determine the memory type, we could
                // only clear the knowledge for that memory type.
                memory_contents.clear();
                continue;
            }
        };

        let addr = mem_int.addr().into();

        match mem_int.op() {
            MemoryOp::GetPrevious => {
                // If there is an unconsumed send to this address, consume it.
                // In that case, we can replace both bus interactions with equality constraints
                // between the data that would have been sent and received.
                if let Some(existing) = memory_contents.remove(&addr) {
                    for (existing, new) in existing.data.iter().zip_eq(mem_int.data().iter()) {
                        new_constraints.push(AlgebraicConstraint::assert_zero(
                            existing.clone() - new.clone(),
                        ));
                    }
                    for (existing_timestamp_limb, new_timestamp_limb) in existing
                        .timestamp_limbs
                        .iter()
                        .zip_eq(mem_int.timestamp_limbs().iter())
                    {
                        new_constraints.push(AlgebraicConstraint::assert_zero(
                            existing_timestamp_limb.clone() - new_timestamp_limb.clone(),
                        ));
                    }
                    to_remove.extend([index, existing.bus_index]);
                }
            }
            MemoryOp::SetNew => {
                // We can only retain knowledge about addresses where we can prove
                // that this send operation does not interfere with it, i.e.
                // if we can prove that the two addresses differ by at least a word size.
                memory_contents.retain(|other_addr, _| {
                    addr.0
                        .iter()
                        .zip_eq(other_addr.0.iter())
                        // Two addresses are different if they differ in at least one component.
                        .any(|(a, b)| solver.are_expressions_known_to_be_different(a, b))
                });
                memory_contents.insert(
                    addr.clone(),
                    MemoryContent::from_bus_interaction(index, mem_int),
                );
            }
        }
    }

    log::debug!(
        "Removing {} memory interactions and adding {} new constraints",
        to_remove.len(),
        new_constraints.len()
    );

    (to_remove, new_constraints)
}


================================================
FILE: autoprecompiles/src/optimistic/algebraic_references.rs
================================================
use std::collections::BTreeMap;

use crate::{empirical_constraints::BlockCell, expression::AlgebraicReference};

/// Maps BlockCells to their corresponding AlgebraicReferences.
pub struct BlockCellAlgebraicReferenceMapper {
    block_cell_to_algebraic_reference: BTreeMap<BlockCell, AlgebraicReference>,
}

impl BlockCellAlgebraicReferenceMapper {
    /// Creates a new BlockCellAlgebraicReferenceMapper.
    /// Arguments:
    /// - `subs`: A mapping from instruction index and column index to polynomial IDs.
    ///   This would typically come from a `ColumnAllocator`.
    /// - `columns`: An iterator over the algebraic references for the columns in the block.
    pub fn new(subs: &[Vec<u64>], columns: impl Iterator<Item = AlgebraicReference>) -> Self {
        let poly_id_to_block_cell = subs
            .iter()
            .enumerate()
            .flat_map(|(instr_index, subs)| {
                subs.iter().enumerate().map(move |(col_index, &poly_id)| {
                    (poly_id, BlockCell::new(instr_index, col_index))
                })
            })
            .collect::<BTreeMap<_, _>>();
        let block_cell_to_algebraic_reference = columns
            .map(|r| (*poly_id_to_block_cell.get(&r.id).unwrap(), r))
            .collect::<BTreeMap<_, _>>();
        Self {
            block_cell_to_algebraic_reference,
        }
    }

    pub fn get_algebraic_reference(&self, block_cell: &BlockCell) -> Option<&AlgebraicReference> {
        self.block_cell_to_algebraic_reference.get(block_cell)
    }

    pub fn has_block_cell(&self, block_cell: &BlockCell) -> bool {
        self.block_cell_to_algebraic_reference
            .contains_key(block_cell)
    }
}


================================================
FILE: autoprecompiles/src/optimistic/config.rs
================================================
const DEFAULT_EXECUTION_COUNT_THRESHOLD: u64 = 100;
const DEFAULT_MAX_SEGMENTS: usize = 20;

pub struct OptimisticPrecompileConfig {
    /// For any program line that was not executed at least this many times in the traces,
    /// discard any empirical constraints associated with it.
    pub execution_count_threshold: u64,
    /// The maximum number of segments to keep in memory while detecting empirical constraints.
    /// A higher number here leads to more accurate percentile estimates, but uses more memory.
    pub max_segments: usize,
    /// Whether to restrict empirical constraints to those that are checkable at execution time.
    pub restrict_optimistic_precompiles: bool,
}

pub fn optimistic_precompile_config() -> OptimisticPrecompileConfig {
    let execution_count_threshold = std::env::var("POWDR_OP_EXECUTION_COUNT_THRESHOLD")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(DEFAULT_EXECUTION_COUNT_THRESHOLD);
    let max_segments = std::env::var("POWDR_EMPIRICAL_CONSTRAINTS_MAX_SEGMENTS")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(DEFAULT_MAX_SEGMENTS);
    let restricted_optimistic_precompiles =
        std::env::var("POWDR_RESTRICTED_OPTIMISTIC_PRECOMPILES") == Ok("1".to_string());

    OptimisticPrecompileConfig {
        execution_count_threshold,
        max_segments,
        restrict_optimistic_precompiles: restricted_optimistic_precompiles,
    }
}


================================================
FILE: autoprecompiles/src/optimistic/execution_constraint_generator.rs
================================================
use std::collections::BTreeMap;

use powdr_number::{FieldElement, LargeInt};

use crate::{
    empirical_constraints::{EqualityConstraint, EqualityExpression},
    execution::{OptimisticConstraint, OptimisticExpression, OptimisticLiteral},
    expression::AlgebraicReference,
};

/// Converts a list of equality constraints into optimistic execution constraints.
/// Only works for constraints between numbers and algebraic references that have
/// corresponding optimistic literal, otherwise panics.
pub fn generate_execution_constraints<T: FieldElement>(
    equality_constraints: &[EqualityConstraint<T>],
    optimistic_literals: &BTreeMap<AlgebraicReference, OptimisticLiteral<Vec<T>>>,
) -> Vec<OptimisticConstraint<Vec<T>, u32>> {
    equality_constraints
        .iter()
        .map(|constraint| OptimisticConstraint {
            left: get_optimistic_expression(optimistic_literals, &constraint.left),
            right: get_optimistic_expression(optimistic_literals, &constraint.right),
        })
        .collect()
}

fn get_optimistic_expression<T: FieldElement>(
    optimistic_literals: &BTreeMap<AlgebraicReference, OptimisticLiteral<Vec<T>>>,
    algebraic_expression: &EqualityExpression<T>,
) -> OptimisticExpression<Vec<T>, u32> {
    match algebraic_expression {
        EqualityExpression::Number(n) => {
            OptimisticExpression::Number(n.to_integer().try_into_u32().unwrap())
        }
        EqualityExpression::Reference(r) => {
            let optimistic_literal = optimistic_literals.get(r).unwrap();
            OptimisticExpression::Literal(optimistic_literal.clone())
        }
    }
}


================================================
FILE: autoprecompiles/src/optimistic/execution_literals.rs
================================================
use std::collections::BTreeMap;

use crate::export::ExportOptions;
use crate::memory_optimizer::MemoryBusInteraction;
use crate::symbolic_machine::{
    symbolic_bus_interaction_to_bus_interaction, SymbolicBusInteraction,
};
use crate::symbolic_machine_generator::statements_to_symbolic_machines;
use crate::{
    adapter::{Adapter, AdapterVmConfig},
    blocks::SuperBlock,
    bus_map::BusType,
    execution::{LocalOptimisticLiteral, OptimisticLiteral},
    expression::AlgebraicReference,
    memory_optimizer::MemoryOp,
    optimizer::optimize,
};
use crate::{ColumnAllocator, SymbolicMachine};
use powdr_constraint_solver::inliner::DegreeBound;

/// Maps an algebraic reference to an execution literal, if it represents the limb of a
/// memory access to an address known at compile time.
pub fn optimistic_literals<A: Adapter>(
    block: &SuperBlock<A::Instruction>,
    vm_config: &AdapterVmConfig<A>,
    degree_bound: &DegreeBound,
) -> BTreeMap<AlgebraicReference, OptimisticLiteral<Vec<<A as Adapter>::PowdrField>>> {
    // 1. Generate symbolic machines for each instruction in the block
    let (symbolic_machines, column_allocator) = statements_to_symbolic_machines::<A>(
        block,
        vm_config.instruction_handler,
        &vm_config.bus_map,
    );

    symbolic_machines
        .into_iter()
        .enumerate()
        // 2. Extract memory accesses with known addresses
        .flat_map(|(instruction_index, symbolic_machine)| {
            extract_concrete_memory_accesses::<A>(
                symbolic_machine,
                instruction_index,
                vm_config,
                degree_bound,
            )
        })
        // 3. Map each limb reference to an optimistic literal
        .flat_map(|memory_access| generate_limb_mapping(memory_access, &column_allocator))
        .collect()
}

/// A memory access going to a concrete (= compile-time) address.
struct ConcreteMemoryAccess<T> {
    instruction_index: usize,
    concrete_address: Vec<T>,
    limbs: Vec<AlgebraicReference>,
}

/// Given a symbolic machine, extracts all the concrete memory accesses
/// This works by:
/// - optimizing the symbolic machine to resolve as many addresses as possible
/// - filtering for memory bus interactions with known addresses
/// - extracting the concrete address and the references to the data limbs
fn extract_concrete_memory_accesses<A: Adapter>(
    symbolic_machine: SymbolicMachine<A::PowdrField>,
    instruction_index: usize,
    vm_config: &AdapterVmConfig<A>,
    degree_bound: &DegreeBound,
) -> impl Iterator<Item = ConcreteMemoryAccess<A::PowdrField>> {
    // Optimize the dummy block, so that register addresses become known at compile time.
    // It is important that this happens per instruction, because otherwise the memory
    // optimizer might remove intermediate register accesses, meaning that we'd miss
    // those optimistic literals.
    // Note that the optimizer would still remove some memory accesses, if the instruction
    // accesses the same register multiple times.
    let dummy_column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&symbolic_machine);
    let (symbolic_machine, _) = optimize::<_, _, _, A::MemoryBusInteraction<_>>(
        symbolic_machine.clone(),
        vm_config.bus_interaction_handler.clone(),
        *degree_bound,
        &vm_config.bus_map,
        // The optimizer might introduce new columns, but we'll discard later.
        dummy_column_allocator,
        &mut ExportOptions::default(),
    )
    .unwrap();

    let memory_bus_id = vm_config.bus_map.get_bus_id(&BusType::Memory).unwrap();
    symbolic_machine
        .bus_interactions
        .into_iter()
        // Filter for memory bus interactions
        .filter_map(move |bus_interaction| {
            try_extract_concrete_memory_access::<A>(
                instruction_index,
                bus_interaction,
                memory_bus_id,
            )
        })
}

/// Given a bus interaction, tries to instantiate a ConcreteMemoryAccess.
/// This will work if the bus interaction is a memory bus interaction with a known multiplicity,
/// the address is known concretely, and value references are single columns.
fn try_extract_concrete_memory_access<A: Adapter>(
    instruction_index: usize,
    bus_interaction: SymbolicBusInteraction<A::PowdrField>,
    memory_bus_id: u64,
) -> Option<ConcreteMemoryAccess<A::PowdrField>> {
    let bus_interaction = symbolic_bus_interaction_to_bus_interaction(&bus_interaction);
    let bus_interaction =
        A::MemoryBusInteraction::try_from_bus_interaction(&bus_interaction, memory_bus_id)
            // TODO: This filters out memory bus interactions with unknown multiplicity.
            .ok()
            .flatten()?;
    let address = bus_interaction.addr();
    let data = bus_interaction.data();

    // Find concrete address
    let concrete_address = address
        .into_iter()
        .map(|expr| expr.try_to_known().cloned())
        .collect::<Option<Vec<_>>>()?;

    // Find references to the limbs
    let limbs = data
        .iter()
        .map(|expr| expr.try_to_simple_unknown())
        .collect::<Option<Vec<_>>>()?;

    let instruction_index = match bus_interaction.op() {
        MemoryOp::GetPrevious => instruction_index,
        MemoryOp::SetNew => instruction_index + 1,
    };

    Some(ConcreteMemoryAccess {
        instruction_index,
        concrete_address,
        limbs,
    })
}

/// Given a concrete memory access, generates a mapping from each limb's reference
/// to an optimistic literal representing that limb.
/// Skips limbs that refer to columns introduced by the optimizer.
fn generate_limb_mapping<'a, T: Clone + 'a>(
    memory_access: ConcreteMemoryAccess<T>,
    column_allocator: &'a ColumnAllocator,
) -> impl Iterator<Item = (AlgebraicReference, OptimisticLiteral<Vec<T>>)> + 'a {
    memory_access
        .limbs
        .into_iter()
        .enumerate()
        .filter_map(move |(limb_index, limb_ref)| {
            if !column_allocator.is_known_id(limb_ref.id) {
                // Limb refers to a column introduced by the optimizer, skip it.
                // We would never have empirical constraints on such a column anyway.
                return None;
            }

            let local_literal = LocalOptimisticLiteral::RegisterLimb(
                memory_access.concrete_address.clone(),
                limb_index,
            );
            let optimistic_literal = OptimisticLiteral {
                instr_idx: memory_access.instruction_index,
                val: local_literal,
            };
            Some((limb_ref, optimistic_literal))
        })
}


================================================
FILE: autoprecompiles/src/optimistic/mod.rs
================================================
pub mod algebraic_references;
pub mod config;
pub mod execution_constraint_generator;
pub mod execution_literals;


================================================
FILE: autoprecompiles/src/optimizer.rs
================================================
use std::fmt::Debug;
use std::fmt::Display;
use std::hash::Hash;

use itertools::Itertools;
use powdr_constraint_solver::constraint_system::BusInteractionHandler;
use powdr_constraint_solver::grouped_expression::GroupedExpression;
use powdr_constraint_solver::indexed_constraint_system::IndexedConstraintSystem;
use powdr_constraint_solver::inliner::{self, inline_everything_below_degree_bound};
use powdr_constraint_solver::rule_based_optimizer::rule_based_optimization;
use powdr_constraint_solver::solver::new_solver;
use powdr_number::FieldElement;

use crate::constraint_optimizer;
use crate::constraint_optimizer::{trivial_simplifications, IsBusStateful};
use crate::export::ExportOptions;
use crate::memory_optimizer::MemoryBusInteraction;
use crate::range_constraint_optimizer::{optimize_range_constraints, RangeConstraintHandler};
use crate::symbolic_machine::{
    constraint_system_to_symbolic_machine, symbolic_machine_to_constraint_system,
    SymbolicConstraint,
};
use crate::ColumnAllocator;
use crate::{
    constraint_optimizer::optimize_constraints,
    expression::AlgebraicReference,
    stats_logger::{self, StatsLogger},
    BusMap, BusType, DegreeBound, SymbolicMachine,
};

/// Optimizes a given symbolic machine and returns an equivalent, but "simpler" one.
/// All constraints in the returned machine will respect the given degree bound.
/// New variables may be introduced in the process.
pub fn optimize<T, B, BusTypes, MemoryBus>(
    mut machine: SymbolicMachine<T>,
    bus_interaction_handler: B,
    degree_bound: DegreeBound,
    bus_map: &BusMap<BusTypes>,
    mut column_allocator: ColumnAllocator,
    export_options: &mut ExportOptions,
) -> Result<(SymbolicMachine<T>, ColumnAllocator), crate::constraint_optimizer::Error>
where
    T: FieldElement,
    B: BusInteractionHandler<T> + IsBusStateful<T> + RangeConstraintHandler<T> + Clone,
    BusTypes: PartialEq + Eq + Clone + Display,
    MemoryBus: MemoryBusInteraction<T, AlgebraicReference>,
{
    let mut stats_logger = StatsLogger::start(&machine);

    if let Some(exec_bus_id) = bus_map.get_bus_id(&BusType::ExecutionBridge) {
        machine = optimize_exec_bus(machine, exec_bus_id);
        stats_logger.log("exec bus optimization", &machine);
    }

    export_options.export_optimizer_outer(&machine, "exec_bus");

    let mut new_var = |name: &str| {
        let id = column_allocator.issue_next_poly_id();
        AlgebraicReference {
            // TODO is it a problem that we do not check the name to be unique?
            name: format!("{name}_{id}").into(),
            id,
        }
    };

    let constraint_system = symbolic_machine_to_constraint_system(machine);
    stats_logger.log("system construction", &constraint_system);

    let mut constraint_system: IndexedConstraintSystem<_, _> = constraint_system.into();
    stats_logger.log("indexing", &constraint_system);

    // We could run the rule system before ever constructing the solver.
    // Currently, it does not yet save time.
    // let mut constraint_system = rule_based_optimization(
    //     constraint_system,
    //     NoRangeConstraints,
    //     bus_interaction_handler.clone(),
    //     &mut new_var,
    //     // No degree bound given, i.e. only perform replacements that
    //     // do not increase the degree.
    //     None,
    // )
    // .0;
    // export_options.register_substituted_variables(assignments);
    // export_options.export_optimizer_outer(&machine, "02_rule_based_optimization");
    stats_logger.log("rule-based optimization", &constraint_system);

    let mut solver = new_solver(
        constraint_system.system().clone(),
        bus_interaction_handler.clone(),
    );
    stats_logger.log("constructing the solver", &constraint_system);
    loop {
        export_options
            .export_optimizer_outer_constraint_system(constraint_system.system(), "loop_iteration");
        let stats = stats_logger::Stats::from(&constraint_system);
        constraint_system = optimize_constraints::<_, _, MemoryBus>(
            constraint_system,
            &mut solver,
            bus_interaction_handler.clone(),
            &mut stats_logger,
            bus_map.get_bus_id(&BusType::Memory),
            degree_bound,
            &mut new_var,
            export_options,
        )?
        .into();
        if stats == stats_logger::Stats::from(&constraint_system) {
            break;
        }
    }
    let (constraint_system, substitutions) = inliner::replace_constrained_witness_columns(
        constraint_system,
        inline_everything_below_degree_bound(degree_bound),
    );
    stats_logger.log("inlining", &constraint_system);
    export_options.register_substituted_variables(substitutions);
    export_options.export_optimizer_outer_constraint_system(constraint_system.system(), "inlining");

    let constraint_system = constraint_optimizer::remove_disconnected_columns(
        constraint_system,
        &mut solver,
        bus_interaction_handler.clone(),
    );
    stats_logger.log("removing disconnected columns", &constraint_system);
    export_options.export_optimizer_inner_constraint_system(
        constraint_system.system(),
        "remove_disconnected",
    );

    let (constraint_system, _) = rule_based_optimization(
        constraint_system,
        &solver,
        bus_interaction_handler.clone(),
        &mut new_var,
        Some(degree_bound),
    );
    export_options
        .export_optimizer_outer_constraint_system(constraint_system.system(), "rule_based");
    // Note that the rest of the optimization does not benefit from optimizing range constraints,
    // so we only do it once at the end.
    let constraint_system = optimize_range_constraints(
        constraint_system.into(),
        bus_interaction_handler.clone(),
        degree_bound,
    );
    stats_logger.log("optimizing range constraints", &constraint_system);
    export_options
        .export_optimizer_outer_constraint_system(&constraint_system, "range_constraints");

    let constraint_system = trivial_simplifications(
        constraint_system.into(),
        bus_interaction_handler,
        &mut stats_logger,
    )
    .system()
    .clone();
    export_options.export_optimizer_outer_constraint_system(&constraint_system, "trivial_simp");

    stats_logger.finalize(&constraint_system);

    export_options.export_substituted_variables();

    // Sanity check: Degree bound should be respected:
    for algebraic_constraint in &constraint_system.algebraic_constraints {
        assert!(
            algebraic_constraint.degree() <= degree_bound.identities,
            "Degree bound violated ({} > {}): {algebraic_constraint}",
            algebraic_constraint.degree(),
            degree_bound.identities
        );
    }
    for bus_interaction in &constraint_system.bus_interactions {
        for (i, expr) in bus_interaction.fields().enumerate() {
            assert!(
                expr.degree() <= degree_bound.identities,
                "Degree bound violated in field {i} ({} > {}): {bus_interaction}",
                expr.degree(),
                degree_bound.identities
            );
        }
    }

    // Sanity check: All PC lookups should be removed, because we'd only have constants on the LHS.
    let pc_lookup_bus_id = bus_map.get_bus_id(&BusType::PcLookup).unwrap();
    assert!(
        !constraint_system
            .bus_interactions
            .iter()
            .any(|b| b.bus_id == GroupedExpression::from_number(T::from(pc_lookup_bus_id))),
        "Expected all PC lookups to be removed."
    );
    Ok((
        constraint_system_to_symbolic_machine(constraint_system),
        column_allocator,
    ))
}

pub fn optimize_exec_bus<T: FieldElement>(
    mut machine: SymbolicMachine<T>,
    exec_bus_id: u64,
) -> SymbolicMachine<T> {
    let mut first_seen = false;
    let mut receive = true;
    let mut latest_send = None;
    let mut execution_bus_constraints = vec![];
    machine.bus_interactions.retain(|bus_int| {
        if bus_int.id != exec_bus_id {
            return true;
        }

        if receive {
            // TODO assert that mult matches -expr
        }

        // Keep the first receive
        let keep = if !first_seen {
            first_seen = true;
            true
        } else if !receive {
            // Save the latest send and remove the bus interaction
            latest_send = Some(bus_int.clone());
            false
        } else {
            // Equate the latest send to the new receive and remove the bus interaction
            for (bus_arg, send_arg) in bus_int
                .args
                .iter()
                .zip_eq(latest_send.as_ref().unwrap().args.iter())
            {
                execution_bus_constraints
                    .push(SymbolicConstraint::from(bus_arg.clone() - send_arg.clone()))
            }
            false
        };

        receive = !receive;

        keep
    });

    // Re-add the last send
    machine.bus_interactions.push(latest_send.unwrap());

    // Add the constraints which replace the execution bus interactions
    machine.constraints.extend(execution_bus_constraints);

    machine
}

/// A wrapped variable: Either a regular variable or a bus interaction field.
#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum Variable<V> {
    Variable(V),
    BusInteractionField(usize, usize),
}

impl<V: Display> Display for Variable<V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Variable::Variable(v) => write!(f, "{v}"),
            Variable::BusInteractionField(bus_index, field_index) => {
                write!(f, "BusInteractionField({bus_index}, {field_index})")
            }
        }
    }
}


================================================
FILE: autoprecompiles/src/optimizer_documentation.md
================================================
# The Autoprecompiles Optimizer

## Terminology

### Field Elements

Throughout this document, we will be working in a finite field of prime order `p`.
Often, we use operators or concepts that are only defined
in the integers. In this case, we use the natural number representation
of the field element, i.e. the unique integer `x` such that `0 <= x < p`
where the field operations are defined as `x + y = (x + y) mod p`
and `x * y = (x * y) mod p` for field elements `x` and `y`.

This way, we can also make statements about a field element being
_less than_ another field element, even if this would not make sense
inside the finite field. Sometimes, field elements are also interpreted
as signed integers instead of natural numbers, but this will be clarified.

### Constraint System

The optimizer is operating on an abstraction of a chip we call
_Constraint System_, which consists of a set of _Algebraic Constraints_
and _Bus Interactions_. Both of them contain expressions involving variables.
A Constraint System is _satisfied_ by an assignment
of its variables if the assignment satisfies all Algebraic Constraints
and Bus Interactions in the system.

The purpose of the optimizer is to simplify a Constraint System
into a Constraint System that has the same satisfying assignments.
This is not the exact definition of correctness for the optimizer because
it is also allowed to remove variables and introduce new ones, but
it is a good guideline for now until we have all the definitions.

### Algebraic Constraint

An _Algebraic Constraint_ consists of an _Algebraic Expression_, i.e.
an expression involving the operators `+` and `*` on _Variables_ and
_Constants_ from the implied finite field. The idea is that the constraint
forces the expression to be zero and thus we write it as an equation
`<expr> = 0` (but also `<lhs> = <rhs>` if more convenient, by which
we mean `<rhs> - <lhs> = 0`).

An Algebraic Constraint is _satisfied_ by an assignment of the variables
if it evaluates to zero under this assignment.

Example: `x * (x - 1) = 0` is an algebraic constraint that forces
the variable `x` to be either zero or one, meaning that an
assignment satisfies this constraint if and only if it has `x = 0` or `x = 1`.

### Range Constraint

The task of the optimizer is hugely simplified by the concept of
_Range Constraints_. Range Constraints allow us to combine the effects of
different Algebraic Constraints (and Bus Interactions) on the same variable.
In an abstract way, a _Range Constraint_ is just
a restriction on values and we can say that a value _satisfies_ a Range Constraint
or not. We also say that a Range Constraint _allows_ a value if that value satisfies it.
We can connect Range Constraints and variables (a Range Constraint _on_ a
variable) and say that an assignment of a variable `v` _satisfies_ a Range Constraint
`r` on `v` if the value assigned to `v` satisfies `r`.
A Range Constraint `r` on a variable `v` is _valid_ in a Constraint System if any
satisfying assignment of the Constraint System also satisfies `r`.

During optimization, we derive Range Constraints for expressions and variables
from Algebraic Constraints and Bus Interactions and use them to simplify
the Constraint System. We also use Range Constraints for a uniform abstraction
of Bus Interactions as we will see in a later section.

As an example, let us consider the Constraint System consisting of the
Algebraic Constraint `x * (x - 1) = 0`. From this Algebraic Constraint the optimizer
will synthesize a Range Constraint `r1` on `x` that only allows the values `0` and `1`.
The Range Constraint is valid in the Constraint System because, as we saw at the end of
the previous section, any satisfying assignment for the Algebraic Constraint
must have `x = 0` or `x = 1`. Note that a Range Constraint that allows all values
in the field is always valid, but not very useful.

Now assume we extend the Constraint System by an additional constraint
`(x - 2) * (x - 1) = 0`. The Range Constraint `r1` on `x` is still valid in the extended
system because additional constraints can only reduce the set of satisfying assignments.
If we look at the second constraint in isolation, we can get a Range Constraint `r2` on `x`
that allows exactly the values `1` and `2`. Both `r1` and `r2` are valid in the extended
system, and so is their intersection, which only allows the value `1`.

From this simple example, one can already see the power of these Range Constraints.
In a later section we will talk about the various computations that can be performed
on Range Constraints including the intersection.

#### Concrete Implementation of Range Constraints

The abstract concept of Range Constraints is implemented in the optimizer by a combination
of a _Wrapping Interval_ and a _Bitmask_.

A _Wrapping Interval_ is a pair of field elements `min` and `max`.
A value `x` is allowed by the Wrapping Interval if and only if it is
part of the sequence `min`, `min + 1`, `min + 2`, ..., `max`.
Note that this sequence wraps around the prime `p` of the field.

The following is an equivalent definition:
If `min <= max` (as seen in the natural numbers), the Wrapping
Interval allows a value `x` if and only if `min <= x <= max`
(the non-wrapping case).
If `min > max`, the Wrapping Interval allows a value `x`
if and only if `x >= min` or `x <= max` (the wrapping case).

The reason we allow these wrapping intervals is that we can compute
the Range Constraint interval of an expression `x + k` for any constant `k`
from the Range Constraint interval of `x` without losing information.

A _Bitmask_ is a natural number `bitmask` that is interpreted as a
bitmask for the natural number representation of field elements.
It allows a value `x` if and only if `x & bitmask == x`, i.e. all
bits that are set in `x` are also set in `bitmask`.
Note that in particular, the bitmask can never disallow the value zero.

A _Range Constraint_ allows a value if and only if both the bitmask
and the wrapping interval allow it.

### Bus Interaction

The concept of _Bus Interaction_ is a bit more complicated. The concrete
semantics of a bus interaction depends on the environment, i.e. the
zkVM we are operating inside and the chips it has.

A _Bus Interaction_ consists of a _Bus ID_, a _Multiplicity_ and
a _Payload_. The _Bus ID_ is an Algebraic Expression and specifies
which bus to interact with. The _Multiplicity_ is an Algebraic Expression
and in most cases it should evaluate either to 1 or -1. The _Payload_ is
the data that is sent to the bus or received from the bus and is a list
of Algebraic Expressions.

Usually, one can think of a Bus Interaction to constrain the items in
the payload as a tuple. For example, if you have an XOR bus, then
a Bus Interaction with payload `(a, b, c)` ensures that
`c = a ^ b`. In a bus interaction, there is no intrinsic concept of
inputs and outputs (even though some buses can be seen like that).

In the example of the XOR bus, it is perfectly fine to use
`(a, b, 0xff)` and thus ensure that (on the lower most byte),
`b` is the bitwise negation of `a`.

Buses can only be properly described across a system of chips or
constraint systems. What we want to achieve is that all the buses
are balanced:

A Bus is _balanced_ if across the whole system and for all payloads,
the sum of the multiplicities is zero. Intuitively, with a multiplicity
of 1 we can send some payload and we receive it on the other end
with a multiplicity of -1.

The autoprecompiles optimizer will mostly work with an abstraction
of bus interactions that are specifically implemented for each concrete
bus type, but these implementations also usually fall into categories,
so it should not be difficult to implement this abstraction for a new bus
or system.

### Bus Interaction Abstraction

For the optimizer to be able to handle Bus Interactions, we need to
implement the following methods:

- `is_stateful`: For a given Bus ID (a field element), returns if the bus
  with the given ID is stateful or not. If a bus interaction is not stateful,
  it only affects the payload passed to it and no other elements of the system.
  A memory bus or the execution bridge are examples of stateful busses,
  while range constraint busses, busses modeling arithmetic operations or
  lookup tables are not stateful.

- `handle_bus_interaction`: Takes a Bus Interaction where its items are represented
  by Range Constraints instead of expressions. It returns
  a Bus Interaction with Range Constraints such that all payloads that satisfy
  the input Range Constraints and the bus semantics also satisfy the output
  Range Constraints. An implementation that always returns its inputs
  (or also just fully unconstrained Range Constraints) would be correct, but
  of course you should return Range Constraints that are as tight
  as possible such that the optimizer gets the most out of it.

As an example, let us assume we are modeling a bus that implements a byte
constraint, i.e. a bus that takes a single payload item and enforces that it is
in the range `0..=255`. The bus is not stateful since it does not depend on nor affects any
other parts of the system. A simple correct implementation of
`handle_bus_interaction` would be to always return a `0xff`-mask Range Constraint
for the payload and ignore the input. It is correct because any assignment that
satisfies the bus semantics must have the payload in the range `0..=255`.
Even though this implementation ignores the input Range Constraints, is also the best
possible, since even if the input Range Constraint
is something like `200..=300`, the optimizer will not forget it but
instead combine it with the one returned by `handle_bus_interaction` and derive
`200..=255` as the new Range Constraint for the payload.

Another example is an XOR-bus that takes three payload items `a, b, c`
and ensures that all of them are bytes and `a ^ b = c`. This bus is also not stateful.
Here, one would implement `handle_bus_interaction` by returning the three byte constraints
for the payload items if the input has no restrictions. If two inputs are fully
determined (i.e. only a single value satisfies the Range Constraints),
we can compute the third and return that as a Range Constraint.

We will see later how we can fully optimize away XOR bus interactions using just this
abstraction.

### Memory Bus

TODO Continue with the abstraction using Range Constraints.

## Combining Range Constraints


## Grouped Expressions

The main data structure used for algebraic expressions is the _Grouped Expression_.
A Grouped Expression consists of a constant term, a list of linear terms (a list of pairs of a non-zero coefficient
and a variable) and a list of quadratic terms (a list of pairs of Grouped Expressions).

The variables in the linear terms are unique and the coefficients are required
to be non-zero. The uniqueness is enforced by using a map data type.
This makes it easy to compare, add and subtract affine expressions, which do not
have quadratic terms.

It also provides a normal form for affine Algebraic Constraints if we require 
the coefficient of the first variable (according to some fixed order on the 
variables) to be one. Note that an Algebraic Constraint can be multiplied 
by a nonzero factor without changing the semantics.

Addition and subtraction of Grouped Expressions are implemented to remove linear terms that cancel each other out,
and they perform some checks also in the quadratic terms, but this part is not complete for performance reasons.

## Equivalence Notion

### Introduction and Example

We start with some informal intuition and an example.

We call two Constraint Systems _equivalent_ if every satisfying assignment for one system
can be extended to a satisfying assignment for the other system and every such extension
leads to the same payloads and multiplicities for all _stateful_ bus interactions in both systems.

As an example, consider the two systems

System A:
```
x = 8
x + y + z = 12
BusInteraction { bus_id = 2, multiplicity = 1, payload = [x, y, z] }
w * (w - 1) = 0
```

System B:[^variables]
```
y' + z' = 4
BusInteraction { bus_id = 2, multiplicity = 1, payload = [8, y', z'] }
```

[^variables]:
    In this pair of systems, and throughout the rest of this document, we will use
    unprimed variables for the first system and primed ones for the second system.
    When two variables have the same name (modulo primes), that means the variables
    are informally *intended* to have the same value. We will formalize this idea
    later.

Let us assume that the bus with ID 2 is stateful and allows all combinations of values between 0 and 100 (inclusive).
Note that the variables `y`/`y'` and `z`/`z'` are not uniquely determined in either system. The stateful bus
acts both as input and output for the system.

Note that System B is obtained from System A by substituting `x = 8`, removing `w`, and replacing `y,z` with `y',z'`.

All satisfying assignments of System A must have `x = 8` and either `w = 0` or `w = 1`.
Such an assignment also satisfies System B (with the variables primed)
and it produces the same values for the stateful bus interaction.

The converse is a bit more complicated: Satisfying assignments of system B only assign the variables
`y'` and `z'`.  We can give `y` and `z` the same values in system A, but we need
to extend the assignment so that it assigns `x` and `w` and satisfies System A. For `x`, the only
choice we have is `x = 8`, but there are two ways to extend the assignment with regards to `w` such that
it is still satisfying, `w = 0` or `w = 1`. Since both ways to extend the assignment
produce the same values in the stateful bus interaction, the systems are equivalent.

### Abstract Equivalence Definition

Now let's proceed formally.

Let $S = (C, B)$ be a system, defined over a vector of variables, $w$. Let
$C$ be the stateless constraints of the system: a formula over $w$. This includes the
algebraic constraints and stateless buses. Let $B$ be the stateful bus interactions.
It is a fixed-length sequence of interactions. Each interaction is a pair.
The first component, $d$, is the data, a fixed-length
list of algebraic expressions, so its type is $\mathbb{F}^+$ (sequences of
positive length of algebraic expressions over $\mathbb{F}$). Assume the bus ID
is represented as the first entry in $d$, for simplicity. The second component
of an interaction is $m$, the multiplicity, which is an algebraic expression.

The bus interactions will be aggregated into a special kind of multiset. We
refer to a map from $\mathbb{F}^+ \to \mathbb{F}$ as a “field multiset” (aka
“multiset”). This name reflects an interpretation of the map as a multiset in
which each key in the map appears with multiplicity equal to its
value.[^fmultiset] Note
that these multisets can be added pointwise. That is, for multisets $m$ and
$m'$, their sum $m + m'$ maps each key $k$ to $m(k) + m'(k)$. We interpret a bus
interaction as a multiset with one key and the specified multiplicity. That is,
we define $\textsf{toMs}(d, m)$ to be the field multiset that maps key $d$ to
value $m$ and all other keys to value $0$. Then, we define $\Sigma(B)$ to be
$\sum_{(d,m) \in B} \textsf{toMs}(d, m)$

[^fmultiset]:
    A field multiset is slightly different than a standard multiset. In a
    standard multiset, the multiplicities are natural numbers, not field
    elements. Thus, in a field multiset, multiplicities can cancel out and can
    be negative. For example, in a field multiset over $\mathbb{F}_2$, for a key
    $k$, containing $k$ twice is equivalent to containing $k$ zero times.
    We use field multisets because the cryptography used to create zkVMs can
    prove properties of field multisets, but not standard multisets.
    While some SMT solvers, like cvc5, do have a theory of standard
    multisets ([link][bags]), field multisets are more naturally encoded using
    the theory of arrays, with pointwise addition.

[bags]: https://cvc5.github.io/docs/cvc5-1.3.2/theories/bags.html


Now we can define equivalence, between systems. Assume two systems $S = (C, B)$
and $S' = (C', B')$ in variables $w$ and $w'$, respectively.

Equivalence has two conditions.

The first condition is **completeness**, which says that when $S$ is satisfiable,
so is $S'$, and with the same effects (stateful bus interactions). Formally,
there should exist an efficient $E(w) \to w'$ such that: for all $w$ and $s$,
if $C(w) \wedge \Sigma(B(w)) = s$,
then $C'(w') \wedge \Sigma(B'(w')) = s$,
where $w' = E(w)$.

The second condition is **soundness**, which says that when $S'$ is satisfiable, $S$
is too, and with the same effects. Formally, there should exists an efficient
$I(w') \to w$ such that: for all $w'$ and $s$,
if $C'(w') \wedge \Sigma(B'(w')) = s$,
then $C(w) \wedge \Sigma(B(w)) = s$,
where $w = I(w')$.

In the context of powdr, $S$ is the input to the optimization pipeline and $S'$
is the output. The pipeline also implicitly outputs $E$, which is encoded as
follows. Most of the variables in $w'$ have the same name as some variable in
$w$---they takes its value. Other variables have an entry in the "derived
variables", which explains how to compute them from $w$.

### Worked example

We will give two equivalent systems, as examples.

The first system, $S = (C, B)$ is a slightly more complex version of the
informal example above, with $b$ in place of $w$.

> $d_0 = (2, x, y, z), m_0 = 1$
>
> $d_1 = (2, x, y, z), m_1 = b$
>
> $d_2 = (2, 8, y, z), m_2 = -b$
>
> $C = (x = 8 \wedge x + y + z = 12 \wedge b(b-1) = 0)$

The second system $S' = (B', C')$ is:

> $d'_0 = (2, 8, y', z'), m'_0 = 1$
>
> $C' = (y' + z' = 4)$

Algorithmically, one optimizes $S$ into $S'$ by the following transformations:

1. Since $x = 8$, substitute $8$ for $x$.
2. Now, we have $d_1 = d_2$, and $m_1 = -m_2$, so remove both bus
   interactions--they have equal data and their multiplicities sum to 0.
3. $b$ appears in no bus interactions, and in no algebraic constraints with
   other variables. Moreover, the constraints it does appear in are satisfiable.
   Remove them.

Now, we prove that these systems are equivalent under the prior definition. That
is, we prove soundness and completeness.

#### Soundness

$I(w') \to w$ is defined to map $w'=(y',z')$ to $w=(x,y,z,b)$ as follows:
$x \gets 8, y \gets y', z \gets z', b \gets 0$.

Roughly, we must show:

$$\forall w', \forall s, C'(w') \wedge \Sigma(B'(w')) = s \land w = I(w')
\implies C(w) \wedge \Sigma(B(w)) = s$$

Which is the same as

$$\forall w', C'(w') \wedge \land w = I(w')
\implies C(w) \wedge \Sigma(B(w)) = \Sigma(B'(w'))$$


Proof:

* Fix $w' = (y', z')$.
* To show the $\implies$, assume
  * $w = I(w')$, that is:
    * $x = 8$
    * $y = y'$
    * $z = z'$
    * $b = 0$
  * $y' + z' = 4$
  * $s = $
* And now we need to show each of the following goals:
  * $x = 8$, since it is part of $C(w)$
    * we already have this
  * $x + y + z = 12$, since it is also part of $C(w)$
    * we have this since we have $x=8, y=y', z=z', y'+z'=4$
  * $b(b-1) = 0$, since it is also part of $C(w)$
    * we have this since $b=0$
  * $\mathsf{toMs}((2, 8, y', z'), 1) = \mathsf{toMs}((2, x, y, z), 1) + \mathsf{toMs}((2, x, y, z), b) + \mathsf{toMs}((2, 8, y, z), -b)$
    * Fist, let $s = \mathsf{toMs}((2, 8, y', z'), 1)$
    * since $y=y'$ and $z=z'$, we have
        $s = \mathsf{toMs}((2, 8, y, z), 1)$
    * since $x=8$, we have
        $s = \mathsf{toMs}((2, x, y, z), 1)$
    * since 0 multiplicities are an identity for $+$, we have
        $s = \mathsf{toMs}((2, x, y, z), 1) + \mathsf{toMs}((2, x, y, z), 0) + \mathsf{toMs}((2, 8, y, z), 0)$
    * since $b=0$, we have our goal:
        $s = \mathsf{toMs}((2, x, y, z), 1) + \mathsf{toMs}((2, x, y, z), b) + \mathsf{toMs}((2, 8, y, z), -b)$

#### Completeness

$E$ is defined as $y' \gets y, z' \gets z$.

Roughly, we must show:

$$\forall w, \forall s, C(w) \wedge \Sigma(B(w)) = s \wedge w' = E(w)
\implies C'(w') \wedge \Sigma(B'(w')) = s$$

Which is the same as

$$\forall w, C(w) \wedge w' = E(w)
\implies C'(w') \wedge \Sigma(B(w)) = \Sigma(B'(w'))$$

Proof:

* Fix $w = (x, y, z, b)$.
* Fix $w' = (y', z')$.
* To show the $\implies$, assume
  * $w'=E(w)$, that is:
    * $y' = y$
    * $z' = z$
  * $x = 8$
  * $x + y + z = 12$
* And now we need to show each of the following goals:
  * $y' + z' = 4$
    * we have this from $y' = y, z' = z, x = 8, x + y + z = 12$
  * $\mathsf{toMs}((2, x, y, z), 1) + \mathsf{toMs}((2, x, y, z), b) + \mathsf{toMs}((2, 8, y, z), -b) = \mathsf{toMs}((2, 8, y', z'), 1)$
    * let
      $s = \mathsf{toMs}((2, x, y, z), 1) + \mathsf{toMs}((2, x, y, z), b) + \mathsf{toMs}((2, 8, y, z), -b)$
    * since $x = 8$, we have:
      $s = \mathsf{toMs}((2, 8, y, z), 1) + \mathsf{toMs}((2, 8, y, z), b) + \mathsf{toMs}((2, 8, y, z), -b)$
    * by additive inverse for multiset multiplicities we have:
      $s = \mathsf{toMs}((2, 8, y, z), 1)$
    * by $y'=y,x'=x$, we have our goal:
      $s = \mathsf{toMs}((2, 8, y', z'), 1)$

### Connection to prior definitions from the literature

Our definition is an instantiation of Ozdemir et al.'s definition of ZKP
compiler correctness from the paper ["Bounded Verification for
Finite-Field-Blasting in a Compiler for Zero Knowledge Proofs"][1]. Start from
their Definition 1. To see this, set:

* their $w$ and $w'$ to our $w$ and $w'$,
* their $x$ and $x'$ to our $s$ (both are $s$),
* their $\phi(x,w)$ to our $C(w) \wedge \Sigma(B(w)) = s$,
* their $\phi'(x',w')$ to our $C'(w') \wedge \Sigma(B'(w')) = s$,
* their $\mathsf{Ext}_x(x)$ to the identity function from $s$ to itself,
* their $\mathsf{Ext}_w(x, w)$ to our $E$, and
* their $\mathsf{Inv}(x', i')$ to our $I$.

This alignment bodes very well for our definition. Ozdemir et al. proved that a
ZKP compiler that is correct by their definition can securely compose with a
zkSNARK for the compiler's output language to give a zkSNARK for the compiler's
input language. We would hope to show a similar result using our definition. But
our result, would also need to account for the zkVM's design. Our result would
say something like (secure zkSNARK for plonkish constraints) + (correct zkVM) +
(correct powdr) = (secure zkSNARK for RISC-V).

### Connections to Georg's definition

Our definition strengthens Georg's slightly. In his soundness definition,
$I$ and $E$ are de-skolemized (their outputs are existentially quantified). This
is equivalent to removing the requirement that $I$ and $E$ be efficient. An
inefficient $E$ really wouldn't work, because then you can't compute the witness
$w'$. Fortunately, powdr outputs $E$ (encoded in the variable derivations). An
inefficient $I$ means that powdr would compose with a zkSNARG, but not a
zkSNARK. That is, it no longer applies to knowledge soundness, just to
existential soundness.

### Constraints

In the foregoing, we noted that stateless bus interactions and algebraic
constraints are represented by $C$. Now, we discuss $C$ in more detail.

In terms of SMT theories, the algebraic constraints are just QF_FF (quantifier-free over a finite field) predicates over the variables $w$. More
specifically, they are $\mathbb{F}$ equalities over terms constructed with $+$
and $\times$ in $\mathbb{F}$.

The are a few different bus interactions, that contribute to $C$:

* TODO

### Requirements that are not yet formalized.

The definition above is a living object. There are requirements for powdr that
we have not yet formalized, and there may be some that we are not yet aware of.
Most of these are likely weird invariants that OpenVM implicitly assumes in its
own definition of correctness.

Currently, we know of one unformalized requirement:

* Under all satisfying assignments, a constraints system must ensure that the
  different between the execution step counter in its final execution bus send
  and its initial execution bus receive is at most the total number of bus
  interactions. This requirement is used to prevent overflows related to the
  step counter and the bus multiplicities. Powdr is currently violating this
  requirement[2]. But also, this requirement is not tight. Many looser
  requirements could also prevent overflow. And, powdr might be able to be
  changed to respect it.

  We expect that it will be easy to verify a requirement like this one once we
  figure out exactly what we need to verify. It is also possible that this
  requirement will end up being something that is not the responsibility of the
  optimizer and is instead the responsibility of a different part of the
  pipeline.

## Optimization Steps

The called functions are

```
optimize_exec_bus
loop:
    solver_based_optimization
    remove_trivial_constraints
    remove_free_variables
    remove_disconnected_columns
    trivial_simplifications
    optimize_memory
    LowDegreeBusInteractionOptimizer
inliner::replace_constrained_witness_columns
optimize_range_constraints
trivial_simplifications
```

in addition, in the solver we have to explain:
- linearizing
- boolean extraction
- solving algebraic constraints
 - simple equivalence
 - splitting into multiple constraints
 - solving itself
- handling bus interactions
- quadratic equivalence detection
- exhaustive search
- equal zero check

### Constraint System Solver

The Constraint System Solver is the core of the optimizer. It is created from a Constraint System, but
it does not directly modify the Constraint System. Instead it acts as an information base about the
variables in the Constraint System. It can provide tight Range Constraints for variables or expressions,
which include the special case of variables being constant. The optimizer uses the Constraint System Solver
to substitute such constant variables. It can also determine if two Algebraic Expressions are always different,
which is crucial for memory optimization to solve the aliasing problem.

#### Linearizing

#### Boolean Extraction

#### Simple Variable Equivalence

`try_to_simple_equivalence`

#### Splitting Algebraic Constraints Into Multiple Constraints

#### Solving Algebraic Constraints

##### Affine Constraints

##### Quadratic Constraints

#### Handling Bus Interactions

#### Quadratic Equivalence Detection

#### Exhaustive Search

#### Equal Zero Check

[1]: https://eprint.iacr.org/2023/778.pdf
[2]: https://github.com/powdr-labs/powdr/issues/3542


================================================
FILE: autoprecompiles/src/pgo/cell/mod.rs
================================================
use std::{collections::BTreeMap, io::BufWriter};

use itertools::Itertools;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use selection::select_blocks_greedy;
use serde::{Deserialize, Serialize};

use crate::{
    adapter::{Adapter, AdapterApcWithStats, AdapterExecutionBlocks, AdapterVmConfig, PgoAdapter},
    blocks::{BasicBlock, BlockAndStats, SuperBlock},
    evaluation::{evaluate_apc, EvaluationResult},
    execution_profile::ExecutionProfile,
    export::{ExportLevel, ExportOptions},
    EmpiricalConstraints, PowdrConfig,
};

mod selection;

/// Trait for autoprecompile candidates.
/// Provides ApcWithStats with logic for evaluating a candidate.
pub trait ApcCandidate<A: Adapter>: Sized {
    fn create(apc_with_stats: AdapterApcWithStats<A>) -> Self;
    fn inner(&self) -> &AdapterApcWithStats<A>;
    fn into_inner(self) -> AdapterApcWithStats<A>;
    // cost of the APC before optimization
    fn cost_before_opt(&self) -> usize;
    // cost of the APC after optimization
    fn cost_after_opt(&self) -> usize;
    // value of the APC for each time it is used
    fn value_per_use(&self) -> usize;
}

#[derive(Serialize, Deserialize)]
/// NOTE: When making changes to this field or any of the contained types,
/// JSON_EXPORT_VERSION must be updated
pub struct ApcCandidateJsonExport {
    // execution_frequency
    pub execution_frequency: usize,
    // original instructions (pretty printed)
    pub original_blocks: Vec<BasicBlock<String>>,
    // before and after optimization stats
    pub stats: EvaluationResult,
    // width before optimisation, used for software version cells in effectiveness plot
    pub width_before: usize,
    // value used in ranking of candidates
    pub value: usize,
    // cost before optimisation, used for effectiveness calculation
    pub cost_before: f64,
    // cost after optimization, used for effectiveness calculation and ranking of candidates
    pub cost_after: f64,
}

pub struct CellPgo<A, C> {
    _marker: std::marker::PhantomData<(A, C)>,
    data: ExecutionProfile,
    max_total_apc_columns: Option<usize>,
}

impl<A, C> CellPgo<A, C> {
    pub fn with_pgo_data_and_max_columns(
        data: ExecutionProfile,
        max_total_apc_columns: Option<usize>,
    ) -> Self {
        Self {
            _marker: std::marker::PhantomData,
            data,
            max_total_apc_columns,
        }
    }
}

/// This version is used by external tools to support multiple versions of the json export.
/// Version should be incremented whenever a breaking change is made to the type (or inner types).
/// Version Log:
/// 0: Serialize only APCs as Vec<ApcCandidateJsonExport>
/// 1: Add labels to the JSON export
/// 2: Rename apcs[*].original_block.statements -> apcs[*].original_block.instructions
/// 3. Remove apcs[*].apc_candidate_file
/// 4. superblocks: original_blocks: Vec<BasicBlock<_>>
const JSON_EXPORT_VERSION: usize = 4;

#[derive(Serialize, Deserialize)]
struct JsonExport {
    version: usize,
    apcs: Vec<ApcCandidateJsonExport>,
    labels: BTreeMap<u64, Vec<String>>,
}

impl JsonExport {
    fn new(apcs: Vec<ApcCandidateJsonExport>, labels: BTreeMap<u64, Vec<String>>) -> Self {
        Self {
            version: JSON_EXPORT_VERSION,
            apcs,
            labels,
        }
    }
}

impl<A: Adapter + Send + Sync, C: ApcCandidate<A> + Send + Sync> PgoAdapter for CellPgo<A, C> {
    type Adapter = A;

    fn create_apcs_with_pgo(
        &self,
        exec_blocks: AdapterExecutionBlocks<Self::Adapter>,
        config: &PowdrConfig,
        vm_config: AdapterVmConfig<Self::Adapter>,
        labels: BTreeMap<u64, Vec<String>>,
        empirical_constraints: EmpiricalConstraints,
    ) -> Vec<AdapterApcWithStats<Self::Adapter>> {
        if config.autoprecompiles == 0 {
            return vec![];
        }

        let AdapterExecutionBlocks::<Self::Adapter> {
            blocks,
            execution_bb_runs,
        } = exec_blocks;

        tracing::info!(
            "Generating autoprecompiles for all {} blocks in parallel",
            blocks.len(),
        );

        // Generate apcs in parallel.
        // Produces two matching vectors: one with the APCs and another with the corresponding originating block.
        let (apcs, blocks): (Vec<_>, Vec<_>) = blocks
            .into_par_iter()
            .filter_map(|block_and_stats| {
                let start = std::time::Instant::now();
                let res = try_generate_candidate::<A, C>(
                    block_and_stats.block.clone(),
                    config,
                    &vm_config,
                    &empirical_constraints,
                )?;
                tracing::debug!(
                    "Generated APC for block {:?}, (took {:?})",
                    block_and_stats.block.start_pcs(),
                    start.elapsed()
                );
                Some((res, block_and_stats))
            })
            .collect();

        // write the APC candidates JSON to disk if the directory is specified.
        if let Some(apc_candidates_dir_path) = &config.apc_candidates_dir_path {
            let apcs = apcs
                .iter()
                .zip_eq(&blocks)
                .map(|(apc, candidate)| apc_candidate_json_export::<A, _>(apc, candidate))
                .collect();
            let json = JsonExport::new(apcs, labels);
            let json_path = apc_candidates_dir_path.join("apc_candidates.json");
            let file = std::fs::File::create(&json_path)
                .expect("Failed to create file for APC candidates JSON");
            serde_json::to_writer(BufWriter::new(file), &json)
                .expect("Failed to write APC candidates JSON to file");
        }

        // select best candidates
        let budget = self.max_total_apc_columns.unwrap_or(usize::MAX);
        let max_selected = (config.autoprecompiles + config.skip_autoprecompiles) as usize;
        let selection =
            select_blocks_greedy(&apcs, &blocks, budget, max_selected, &execution_bb_runs);

        // skip per config
        let skip = (config.skip_autoprecompiles as usize).min(selection.len());

        // filter and order the apcs using the selection
        let mut apcs: Vec<_> = apcs.into_iter().map(|apc| Some(apc.into_inner())).collect();
        selection
            .into_iter()
            .skip(skip)
            .map(|position| apcs[position].take().unwrap())
            .collect()
    }

    fn execution_profile(&self) -> Option<&ExecutionProfile> {
        Some(&self.data)
    }
}

// Try and build an autoprecompile candidate from a superblock.
fn try_generate_candidate<A: Adapter, C: ApcCandidate<A>>(
    block: SuperBlock<A::Instruction>,
    config: &PowdrConfig,
    vm_config: &AdapterVmConfig<A>,
    empirical_constraints: &EmpiricalConstraints,
) -> Option<C> {
    let export_options = ExportOptions::new(
        config.apc_candidates_dir_path.clone(),
        &block.start_pcs(),
        ExportLevel::OnlyAPC,
    );
    let apc = crate::build::<A>(
        block.clone(),
        vm_config.clone(),
        config.degree_bound,
        export_options,
        empirical_constraints,
    )
    .ok()?;
    let apc_with_stats = evaluate_apc::<A>(vm_config.instruction_handler, apc);
    Some(C::create(apc_with_stats))
}

fn apc_candidate_json_export<A: Adapter, C: ApcCandidate<A>>(
    apc: &C,
    block: &BlockAndStats<A::Instruction>,
) -> ApcCandidateJsonExport {
    let original_blocks: Vec<_> = apc
        .inner()
        .apc()
        .block
        .blocks()
        .map(|b| BasicBlock {
            start_pc: b.start_pc,
            instructions: b.instructions.iter().map(ToString::to_string).collect(),
        })
        .collect();

    ApcCandidateJsonExport {
        execution_frequency: block.count as usize,
        original_blocks,
        stats: apc.inner().evaluation_result(),
        width_before: apc.cost_before_opt(),
        value: apc
            .value_per_use()
            .checked_mul(block.count as usize)
            .unwrap(),
        cost_before: apc.cost_before_opt() as f64,
        cost_after: apc.cost_after_opt() as f64,
    }
}


================================================
FILE: autoprecompiles/src/pgo/cell/selection.rs
================================================
use itertools::Itertools;
use priority_queue::PriorityQueue;
use serde::{Deserialize, Serialize};

use crate::{
    adapter::Adapter,
    blocks::{find_non_overlapping, BlockAndStats, ExecutionBasicBlockRun},
};

use super::ApcCandidate;

#[derive(Clone, Debug, Serialize, Deserialize)]
// A candidate block, used during block selection
pub struct BlockCandidate {
    // sequence of basic blocks composing this block
    pub start_pcs: Vec<u64>,
    // cost of original basic blocks (before optimization)
    pub cost_before: usize,
    // cost after optimization
    pub cost_after: usize,
    // value gained each time this candidate is used
    pub value_per_use: usize,
    // times this block could run in the execution
    pub execution_count: u32,
}

impl BlockCandidate {
    pub fn new<A: Adapter, C: ApcCandidate<A>>(
        block: &BlockAndStats<A::Instruction>,
        apc: &C,
    ) -> Self {
        Self {
            start_pcs: block.block.start_pcs(),
            cost_before: apc.cost_before_opt(),
            cost_after: apc.cost_after_opt(),
            value_per_use: apc.value_per_use(),
            execution_count: block.count,
        }
    }

    pub fn value(&self) -> usize {
        (self.execution_count as usize)
            .checked_mul(self.value_per_use)
            .unwrap()
    }

    pub fn cost(&self) -> usize {
        self.cost_after
    }

    pub fn density(&self) -> Density {
        Density {
            value: self.value(),
            cost: self.cost(),
            tie: self.start_pcs[0],
        }
    }
}

#[derive(Clone, Debug)]
pub struct Density {
    value: usize,
    cost: usize,
    tie: u64,
}

impl PartialEq for Density {
    fn eq(&self, other: &Self) -> bool {
        self.cmp(other) == std::cmp::Ordering::Equal
    }
}

impl Eq for Density {}

impl PartialOrd for Density {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for Density {
    // Avoids value/cost integer ratio by using cross-multiplication
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        let lhs = self.value.checked_mul(other.cost).unwrap();
        let rhs = other.value.checked_mul(self.cost).unwrap();

        lhs.cmp(&rhs).then_with(|| self.tie.cmp(&other.tie))
    }
}

/// Counts the occurrences of a candidate in a basic block run.
/// Returns the count and the sub-runs after the candidate is removed.
fn count_and_update_run<'a>(
    sblock: &BlockCandidate,
    run: &'a ExecutionBasicBlockRun,
) -> (u32, impl Iterator<Item = ExecutionBasicBlockRun> + 'a) {
    let sblock_len = sblock.start_pcs.len();
    let matches = find_non_overlapping(&run.0, &sblock.start_pcs);
    let count = matches.len() as u32;
    let match_intervals = matches.into_iter().flat_map(move |i| [i, i + sblock_len]);
    let sub_runs = std::iter::once(0)
        .chain(match_intervals)
        .chain(std::iter::once(run.0.len()))
        .tuples()
        // skip empty sequences
        .filter(|(start, end)| start != end)
        .map(|(start, end)| ExecutionBasicBlockRun(run.0[start..end].to_vec()));
    (count, sub_runs)
}

/// Count the occurences of a candidate in the execution (multiple basic block runs).
/// Returns the count and an updated execution with the candidate removed.
fn count_and_update_execution(
    sblock: &BlockCandidate,
    execution: &[(ExecutionBasicBlockRun, u32)],
) -> (u32, Vec<(ExecutionBasicBlockRun, u32)>) {
    let mut total_count = 0;
    let new_execution = execution
        .iter()
        .flat_map(|(run, run_count)| {
            let (count, sub_runs) = count_and_update_run(sblock, run);
            total_count += count * *run_count;
            sub_runs.map(|sub_run| (sub_run, *run_count))
        })
        .collect();
    (total_count, new_execution)
}

/// Greedily select blocks based on density.
/// Once a candidate is selected, the value of the remaining candidates are updated to reflect the new execution (with the selection removed).
/// Returns the indices of the selected blocks, together with how many times each would run if applied over the execution in the selected order.
pub fn select_blocks_greedy<A: Adapter, C: ApcCandidate<A>>(
    apcs: &[C],
    blocks: &[BlockAndStats<A::Instruction>],
    budget: usize,
    max_selected: usize,
    execution_bb_runs: &[(ExecutionBasicBlockRun, u32)],
) -> Vec<usize> {
    let mut candidates = blocks
        .iter()
        .zip_eq(apcs)
        .map(|(b, apc)| BlockCandidate::new(b, apc))
        .collect::<Vec<_>>();

    // keep candidates by priority. As a candidate is selected, remaining priorities will be (lazily) updated.
    let mut by_priority: PriorityQueue<_, _> = candidates
        .iter()
        .map(BlockCandidate::density)
        .enumerate()
        .collect();

    let mut selected = vec![];
    let mut cumulative_cost = 0;
    let mut current_execution = execution_bb_runs.to_vec();

    while let Some((idx, _prio)) = by_priority.pop() {
        let c = &mut candidates[idx];

        // ignore if too costly
        if cumulative_cost + c.cost() > budget {
            // The item does not fit, skip it
            continue;
        }

        // check if the priority of this candidate has changed by re-counting it over the remaining execution.
        let (count, new_execution) = count_and_update_execution(c, &current_execution);
        if count == 0 {
            // candidate no longer runs, remove it
            continue;
        } else if count < c.execution_count {
            // re-insert with updated priority
            c.execution_count = count;
            by_priority.push(idx, c.density());
            continue;
        }

        // the item fits, increment the cumulative cost and update the execution by removing its occurrences
        cumulative_cost += c.cost();
        current_execution = new_execution;
        selected.push(idx);

        if selected.len() >= max_selected {
            break;
        }
    }
    selected
}

#[cfg(test)]
mod test {
    use super::*;

    fn sblock(start_pcs: Vec<u64>) -> BlockCandidate {
        BlockCandidate {
            start_pcs,
            cost_before: 0,
            cost_after: 0,
            value_per_use: 0,
            execution_count: 0,
        }
    }

    fn run(pcs: Vec<u64>) -> ExecutionBasicBlockRun {
        ExecutionBasicBlockRun(pcs)
    }

    #[test]
    fn test_count_and_update_run() {
        // no match: full run returned as single sub-run
        let r = run(vec![3, 4, 5]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![1, 2]), &r);
        assert_eq!(count, 0);
        assert_eq!(sub_runs.collect::<Vec<_>>(), vec![run(vec![3, 4, 5])]);

        // match at start
        let r = run(vec![1, 2, 3, 4]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![1, 2]), &r);
        assert_eq!(count, 1);
        assert_eq!(sub_runs.collect::<Vec<_>>(), vec![run(vec![3, 4])]);

        // match at end
        let r = run(vec![1, 2, 3, 4]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![3, 4]), &r);
        assert_eq!(count, 1);
        assert_eq!(sub_runs.collect::<Vec<_>>(), vec![run(vec![1, 2])]);

        // match in middle
        let r = run(vec![1, 2, 3, 4]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![2, 3]), &r);
        assert_eq!(count, 1);
        assert_eq!(
            sub_runs.collect::<Vec<_>>(),
            vec![run(vec![1]), run(vec![4])]
        );

        // multiple matches
        let r = run(vec![1, 2, 3, 1, 2, 4]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![1, 2]), &r);
        assert_eq!(count, 2);
        assert_eq!(
            sub_runs.collect::<Vec<_>>(),
            vec![run(vec![3]), run(vec![4])]
        );

        // full run is the match: no sub-runs
        let r = run(vec![1, 2, 3]);
        let (count, sub_runs) = count_and_update_run(&sblock(vec![1, 2, 3]), &r);
        assert_eq!(count, 1);
        assert_eq!(sub_runs.collect::<Vec<_>>(), vec![]);
    }
}


================================================
FILE: autoprecompiles/src/pgo/instruction.rs
================================================
use std::{cmp::Reverse, collections::BTreeMap};

use itertools::Itertools;

use crate::{
    adapter::{Adapter, AdapterApcWithStats, AdapterExecutionBlocks, AdapterVmConfig, PgoAdapter},
    execution_profile::ExecutionProfile,
    pgo::create_apcs_for_all_blocks,
    EmpiricalConstraints, PowdrConfig,
};

pub struct InstructionPgo<A> {
    _marker: std::marker::PhantomData<A>,
    data: ExecutionProfile,
}

impl<A> InstructionPgo<A> {
    pub fn with_pgo_data(data: ExecutionProfile) -> Self {
        Self {
            _marker: std::marker::PhantomData,
            data,
        }
    }
}

impl<A: Adapter> PgoAdapter for InstructionPgo<A> {
    type Adapter = A;

    fn create_apcs_with_pgo(
        &self,
        exec_blocks: AdapterExecutionBlocks<Self::Adapter>,
        config: &PowdrConfig,
        vm_config: AdapterVmConfig<Self::Adapter>,
        _labels: BTreeMap<u64, Vec<String>>,
        empirical_constraints: EmpiricalConstraints,
    ) -> Vec<AdapterApcWithStats<Self::Adapter>> {
        tracing::info!(
            "Generating autoprecompiles with instruction PGO for {} blocks",
            exec_blocks.blocks.len()
        );

        if config.autoprecompiles == 0 {
            return vec![];
        }

        let blocks = exec_blocks
            .blocks
            .into_iter()
            // sort by frequency * number of instructions in the block, descending
            .sorted_by_key(|block_and_stats| {
                Reverse(block_and_stats.count * block_and_stats.block.instructions().count() as u32)
            })
            .map(|block_and_stats| {
                let block = block_and_stats.block;
                assert!(block.is_basic_block(), "Instruction PGO does not support superblocks");
                let frequency = block_and_stats.count;
                let number_of_instructions = block.instructions().count();
                let value = frequency * number_of_instructions as u32;

                tracing::debug!(
                    "Basic block start_pc: {}, value: {}, frequency: {}, number_of_instructions: {}",
                    block.pcs().next().unwrap(),
                    value,
                    frequency,
                    number_of_instructions,
                );

                block
            })
            .collect();

        create_apcs_for_all_blocks::<Self::Adapter>(
            blocks,
            config,
            vm_config,
            empirical_constraints,
        )
    }

    fn execution_profile(&self) -> Option<&ExecutionProfile> {
        Some(&self.data)
    }
}


================================================
FILE: autoprecompiles/src/pgo/mod.rs
================================================
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use strum::{Display, EnumString};

use crate::{
    adapter::{Adapter, AdapterApcWithStats, AdapterVmConfig},
    blocks::SuperBlock,
    evaluation::evaluate_apc,
    execution_profile::ExecutionProfile,
    export::{ExportLevel, ExportOptions},
    EmpiricalConstraints, PowdrConfig,
};

mod cell;
mod instruction;
mod none;

pub use {
    cell::{ApcCandidate, CellPgo},
    instruction::InstructionPgo,
    none::NonePgo,
};

/// Three modes for profiler guided optimization with different cost functions to sort the basic blocks by descending cost and select the most costly ones to accelerate.
#[derive(Default)]
pub enum PgoConfig {
    /// value = cells saved per apc * times executed
    /// cost = number of columns in the apc
    /// constraint of max total columns
    Cell(ExecutionProfile, Option<usize>),
    /// value = instruction per apc * times executed
    Instruction(ExecutionProfile),
    /// value = instruction per apc
    #[default]
    None,
}

impl PgoConfig {
    /// Returns the number of times a certain pc was executed in the profile.
    pub fn pc_execution_count(&self, pc: u64) -> Option<u32> {
        match self {
            PgoConfig::Cell(prof, _) | PgoConfig::Instruction(prof) => {
                prof.pc_count.get(&pc).copied()
            }
            PgoConfig::None => None,
        }
    }
}

/// CLI enum for PGO mode
#[derive(Copy, Clone, Debug, EnumString, Display, Default)]
#[strum(serialize_all = "lowercase")]
pub enum PgoType {
    /// cost = cells saved per apc * times executed
    #[default]
    Cell,
    /// cost = instruction per apc * times executed
    Instruction,
    /// cost = instruction per apc
    None,
}

pub fn pgo_config(
    pgo: PgoType,
    max_columns: Option<usize>,
    execution_profile: ExecutionProfile,
) -> PgoConfig {
    match pgo {
        PgoType::Cell => PgoConfig::Cell(execution_profile, max_columns),
        PgoType::Instruction => PgoConfig::Instruction(execution_profile),
        PgoType::None => PgoConfig::None,
    }
}

// Only used for PgoConfig::Instruction and PgoConfig::None,
// because PgoConfig::Cell caches all APCs in sorting stage.
fn create_apcs_for_all_blocks<A: Adapter>(
    blocks: Vec<SuperBlock<A::Instruction>>,
    config: &PowdrConfig,
    vm_config: AdapterVmConfig<A>,
    empirical_constraints: EmpiricalConstraints,
) -> Vec<AdapterApcWithStats<A>> {
    let n_acc = config.autoprecompiles as usize;
    tracing::info!("Generating {n_acc} autoprecompiles in parallel");

    blocks
        .into_par_iter()
        .skip(config.skip_autoprecompiles as usize)
        .take(n_acc)
        .map(|superblock| {
            tracing::debug!(
                "Accelerating block of length {} and start pcs {:?}",
                superblock.instructions().count(),
                superblock.start_pcs(),
            );

            let export_options = ExportOptions::new(
                config.apc_candidates_dir_path.clone(),
                &superblock.start_pcs(),
                ExportLevel::OnlyAPC,
            );
            let apc = crate::build::<A>(
                superblock.clone(),
                vm_config.clone(),
                config.degree_bound,
                export_options,
                &empirical_constraints,
            )
            .unwrap();

            evaluate_apc::<A>(vm_config.instruction_handler, apc)
        })
        .collect()
}


================================================
FILE: autoprecompiles/src/pgo/none.rs
================================================
use std::{cmp::Reverse, collections::BTreeMap};

use derivative::Derivative;
use itertools::Itertools;

use crate::{
    adapter::{Adapter, AdapterApcWithStats, AdapterExecutionBlocks, AdapterVmConfig, PgoAdapter},
    pgo::create_apcs_for_all_blocks,
    EmpiricalConstraints, PowdrConfig,
};

#[derive(Derivative)]
#[derivative(Default(bound = ""))]
pub struct NonePgo<A> {
    _marker: std::marker::PhantomData<A>,
}

impl<A: Adapter> PgoAdapter for NonePgo<A> {
    type Adapter = A;

    fn create_apcs_with_pgo(
        &self,
        exec_blocks: AdapterExecutionBlocks<Self::Adapter>,
        config: &PowdrConfig,
        vm_config: AdapterVmConfig<Self::Adapter>,
        _labels: BTreeMap<u64, Vec<String>>,
        empirical_constraints: EmpiricalConstraints,
    ) -> Vec<AdapterApcWithStats<Self::Adapter>> {
        let blocks = exec_blocks
            .blocks
            .into_iter()
            // sort by number of instructions in the block, descending
            .sorted_by_key(|block_and_stats| {
                Reverse(block_and_stats.block.instructions().count() as u32)
            })
            .map(|block_and_stats| {
                let block = block_and_stats.block;
                assert!(
                    block.is_basic_block(),
                    "None PGO does not support superblocks"
                );
                tracing::debug!(
                    "Basic block start_pc: {}, number_of_instructions: {}",
                    block.pcs().next().unwrap(),
                    block.instructions().count(),
                );

                block
            })
            .collect();

        create_apcs_for_all_blocks::<Self::Adapter>(
            blocks,
            config,
            vm_config,
            empirical_constraints,
        )
    }
}


================================================
FILE: autoprecompiles/src/powdr.rs
================================================
use std::collections::BTreeMap;
use std::sync::Arc;

use itertools::Itertools;
use powdr_expression::visitors::{AllChildren, ExpressionVisitable};
use powdr_number::FieldElement;

use crate::expression::{AlgebraicExpression, AlgebraicReference};
use crate::SymbolicMachine;

pub fn make_refs_zero<T: FieldElement>(expr: &mut AlgebraicExpression<T>) {
    let zero = AlgebraicExpression::Number(T::zero());
    expr.pre_visit_expressions_mut(&mut |expr| {
        if let AlgebraicExpression::Reference(AlgebraicReference { .. }) = expr {
            *expr = zero.clone();
        }
    });
}

pub fn make_bool<T: FieldElement>(expr: AlgebraicExpression<T>) -> AlgebraicExpression<T> {
    let one = AlgebraicExpression::Number(T::from(1u64));
    expr.clone() * (expr - one)
}

pub fn substitute_subexpressions<T: Clone + std::cmp::Ord>(
    expr: &mut AlgebraicExpression<T>,
    sub: &BTreeMap<AlgebraicExpression<T>, AlgebraicExpression<T>>,
) {
    expr.pre_visit_expressions_mut(&mut |expr| {
        if let Some(sub_expr) = sub.get(expr) {
            *expr = sub_expr.clone();
        }
    });
}

pub trait UniqueReferences<'a, T: 'a, R> {
    /// Returns an iterator over the unique references
    fn unique_references(&'a self) -> impl Iterator<Item = R>;
}

impl<'a, T: 'a, E: AllChildren<AlgebraicExpression<T>>> UniqueReferences<'a, T, AlgebraicReference>
    for E
{
    // Output unique column references sorted by ascending id of original instruction columns
    fn unique_references(&'a self) -> impl Iterator<Item = AlgebraicReference> {
        self.all_children()
            .filter_map(|e| {
                if let AlgebraicExpression::Reference(r) = e {
                    Some(r.clone())
                } else {
                    None
                }
            })
            .map(|r| (r.id, r))
            .collect::<BTreeMap<_, _>>()
            .into_values()
    }
}

/// Globalizes the references in the machine by appending a suffix to their names
/// and offsetting their IDs to start from `curr_id`.
/// Returns:
/// - The updated `next_global_id`.
/// - The substitutions, mapping the local reference IDs to the global ones.
/// - The updated machine with globalized references.
pub fn globalize_references<T: FieldElement>(
    machine: SymbolicMachine<T>,
    mut next_global_id: u64,
    suffix: usize,
) -> (u64, Vec<u64>, SymbolicMachine<T>) {
    let unique_reference_ids = machine.unique_references().map(|r| r.id).collect_vec();
    let machine_size = unique_reference_ids.len() as u64;
    assert_eq!(
        *unique_reference_ids.iter().max().unwrap(),
        machine_size - 1,
        "The reference ids must be contiguous"
    );

    let machine = globalize_reference_names(machine, suffix);
    let machine = offset_reference_ids(machine, next_global_id);

    let subs = (next_global_id..(next_global_id + machine_size)).collect::<Vec<_>>();
    next_global_id += machine_size;
    (next_global_id, subs, machine)
}

/// Globalizes the names of references in the machine by appending a suffix.
fn globalize_reference_names<T: FieldElement>(
    mut machine: SymbolicMachine<T>,
    suffix: usize,
) -> SymbolicMachine<T> {
    // Allocate a new string for each *unique* reference in the machine
    let globalized_name = |name| Arc::new(format!("{name}_{suffix}"));
    let name_by_id = machine
        .unique_references()
        .map(|reference| (reference.id, globalized_name(reference.name)))
        .collect::<BTreeMap<_, _>>();

    // Update the names
    machine.pre_visit_expressions_mut(&mut |e| {
        if let AlgebraicExpression::Reference(r) = e {
            r.name = name_by_id.get(&r.id).unwrap().clone();
        }
    });

    machine
}

fn offset_reference_ids<T: FieldElement>(
    mut machine: SymbolicMachine<T>,
    offset: u64,
) -> SymbolicMachine<T> {
    machine.pre_visit_expressions_mut(&mut |e| {
        if let AlgebraicExpression::Reference(r) = e {
            r.id += offset;
        }
    });
    machine
}


================================================
FILE: autoprecompiles/src/range_constraint_optimizer.rs
================================================
use std::collections::BTreeMap;
use std::fmt::Display;
use std::hash::Hash;

use itertools::Itertools;
use powdr_constraint_solver::constraint_system::{
    AlgebraicConstraint, BusInteraction, BusInteractionHandler, ConstraintSystem,
};
use powdr_constraint_solver::grouped_expression::GroupedExpression;
use powdr_constraint_solver::inliner::DegreeBound;
use powdr_constraint_solver::range_constraint::RangeConstraint;
use powdr_constraint_solver::solver::{new_solver, Solver};
use powdr_number::FieldElement;

pub type RangeConstraints<T, V> = Vec<(GroupedExpression<T, V>, RangeConstraint<T>)>;

/// The requested range constraint cannot be implemented.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MakeRangeConstraintsError(pub String);

pub trait RangeConstraintHandler<T: FieldElement> {
    /// If the bus interaction *only* enforces range constraints, returns them
    /// as a map of expressions to range constraints.
    ///
    /// For example:
    /// - If a bus interaction takes two arguments `a` and `b` and enforces the
    ///   range constraints `0 <= a < 2^b`, it is *not* a pure range constraint if
    ///   both values are unknown (because the allowed values of `a` depend on `b`)
    /// - On the other hand, if `b` is known, it is a pure range constraint.
    ///
    /// Any stateful bus interaction is not a pure range constraint.
    /// This function will only be called with bus interactions with multiplicity 1.
    fn pure_range_constraints<V: Ord + Clone + Eq + Display + Hash>(
        &self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
    ) -> Option<RangeConstraints<T, V>>;

    /// Given a set of range constraints, returns a list of bus interactions
    /// that implements them. The implementation is free to implement multiple
    /// range constraints using a single bus interaction.
    /// As all input range constraints are unconditional, the multiplicity of
    /// the returned bus interactions should be 1.
    /// If one of the range constraints cannot be implemented exactly, an error
    /// is returned. For soundness, the implementation should *never* relax the
    /// range constraint.
    fn batch_make_range_constraints<V: Ord + Clone + Eq + Display + Hash>(
        &self,
        range_constraints: RangeConstraints<T, V>,
    ) -> Result<Vec<BusInteraction<GroupedExpression<T, V>>>, MakeRangeConstraintsError>;
}

/// Optimizes range constraints, minimizing the number of bus interactions.
///
/// This step:
/// - removes range constraints that are already implied by existing constraints
/// - batches several range constraints into one bus interaction, if possible
/// - implements bit constraints via polynomial constraints, if the degree bound allows
pub fn optimize_range_constraints<T: FieldElement, V: Ord + Clone + Hash + Eq + Display>(
    mut system: ConstraintSystem<T, V>,
    bus_interaction_handler: impl BusInteractionHandler<T> + RangeConstraintHandler<T> + Clone,
    degree_bound: DegreeBound,
) -> ConstraintSystem<T, V> {
    // Remove all pure range constraints, but collect what was removed.
    // We store the expressions to constrain in a vector, so that we can keep the order of
    // the range constraints as much as possible.
    let mut to_constrain = Vec::new();
    let mut range_constraints = BTreeMap::new();
    system.bus_interactions.retain(|bus_int| {
        if bus_int.multiplicity != GroupedExpression::from_number(T::one()) {
            // Most range constraints are unconditional in practice, it's probably not
            // worth dealing with the conditional ones.
            return true;
        }

        match bus_interaction_handler.pure_range_constraints(bus_int) {
            Some(new_range_constraints) => {
                to_constrain.extend(new_range_constraints.iter().map(|(expr, _)| expr.clone()));
                for (expr, rc) in new_range_constraints {
                    let existing_rc = range_constraints
                        .entry(expr)
                        .or_insert_with(RangeConstraint::default);
                    *existing_rc = existing_rc.conjunction(&rc);
                }
                false
            }
            None => true,
        }
    });

    // Filter range constraints that are already implied by existing constraints.
    // TODO: They could also be implied by each other.
    let mut solver = new_solver(system.clone(), bus_interaction_handler.clone());
    solver.solve().unwrap();
    let to_constrain = to_constrain
        .into_iter()
        .unique()
        .map(|expr| {
            let rc = range_constraints.remove(&expr).unwrap();
            (expr, rc)
        })
        .filter(|(expr, rc)| {
            let current_rc = solver.range_constraint_for_expression(expr);
            current_rc != current_rc.conjunction(rc)
        })
        .collect::<Vec<_>>();

    // Implement bit constraints via polynomial constraints, if the degree bound allows.
    let mut bit_constraints = Vec::new();
    let to_constrain = to_constrain
        .into_iter()
        .filter(|(expr, rc)| {
            let bit_range_constraint = AlgebraicConstraint::assert_bool(expr.clone());
            if rc == &RangeConstraint::from_mask(1)
                && bit_range_constraint.degree() <= degree_bound.identities
            {
                bit_constraints.push(bit_range_constraint);
                false
            } else {
                true
            }
        })
        .collect();

    // Create all range constraints in batch and add them to the system.
    // Note that unwrapping here should be fine, because we only pass range constraints
    // that were returned from `pure_range_constraints`, so clearly the VM is able to
    // implement them.
    let range_constraints = bus_interaction_handler
        .batch_make_range_constraints(to_constrain)
        .unwrap();
    for bus_interaction in &range_constraints {
        assert_eq!(bus_interaction.multiplicity.try_to_number(), Some(T::one()));
    }
    system.bus_interactions.extend(range_constraints);
    system.algebraic_constraints.extend(bit_constraints);

    system
}

/// Utility functions useful for implementing `batch_make_range_constraints`.
pub mod utils {
    use itertools::Itertools;
    use powdr_constraint_solver::{
        grouped_expression::GroupedExpression, range_constraint::RangeConstraint,
    };
    use powdr_number::FieldElement;
    use std::fmt::Display;
    use std::hash::Hash;

    use crate::range_constraint_optimizer::RangeConstraints;

    /// If the range constraints is the range 0..(2^bits - 1), returns Some(bits).
    pub fn range_constraint_to_num_bits<T: FieldElement>(
        range_constraint: &RangeConstraint<T>,
    ) -> Option<usize> {
        (0..30).find(|num_bits| {
            let mask = (1u64 << num_bits) - 1;
            range_constraint == &RangeConstraint::from_mask(mask)
        })
    }

    /// Given a set of range constraints, filters out the byte constraints and returns them.
    pub fn filter_byte_constraints<T: FieldElement, V: Ord + Clone + Eq + Display + Hash>(
        range_constraints: &mut RangeConstraints<T, V>,
    ) -> Vec<GroupedExpression<T, V>> {
        let mut byte_constraints = Vec::new();
        range_constraints.retain(|(expr, rc)| match range_constraint_to_num_bits(rc) {
            Some(8) => {
                byte_constraints.push(expr.clone());
                false
            }
            _ => true,
        });
        byte_constraints.into_iter().unique().collect()
    }
}


================================================
FILE: autoprecompiles/src/stats_logger.rs
================================================
use std::hash::Hash;
use std::{fmt::Display, time::Instant};

use itertools::Itertools;
use powdr_constraint_solver::constraint_system::ConstraintSystem;
use powdr_constraint_solver::indexed_constraint_system::IndexedConstraintSystem;
use powdr_number::FieldElement;

use crate::{powdr::UniqueReferences, SymbolicMachine};

pub struct StatsLogger {
    start_time: Instant,
    step_start_time: Instant,
}

impl StatsLogger {
    pub fn start(system: impl Into<Stats>) -> Self {
        log::info!("Starting optimization - {}", system.into());
        let start_time = Instant::now();
        let step_start_time = start_time;
        StatsLogger {
            start_time,
            step_start_time,
        }
    }

    pub fn log(&mut self, step: &str, system: impl Into<Stats>) {
        let elapsed = self.step_start_time.elapsed().as_secs_f32();
        log::debug!(
            "After {step:<32} (took {elapsed:7.4} s) - {}",
            system.into()
        );
        self.step_start_time = Instant::now();
    }
    pub fn finalize(self, system: impl Into<Stats>) {
        let elapsed = self.start_time.elapsed().as_secs_f32();
        log::info!(
            "Optimization took (took {elapsed:7.4} s) - {}",
            system.into()
        );
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Stats {
    num_constraints: usize,
    num_bus_interactions: usize,
    num_witness_columns: usize,
}

impl Display for Stats {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Constraints: {}, Bus Interactions: {}, Witness Columns: {}",
            self.num_constraints, self.num_bus_interactions, self.num_witness_columns
        )
    }
}

impl<P: FieldElement> From<&SymbolicMachine<P>> for Stats {
    fn from(machine: &SymbolicMachine<P>) -> Self {
        Stats {
            num_constraints: machine.constraints.len(),
            num_bus_interactions: machine.bus_interactions.len(),
            num_witness_columns: machine.unique_references().count(),
        }
    }
}

impl<P: FieldElement, V: Ord + Clone + Hash + Eq> From<&ConstraintSystem<P, V>> for Stats {
    fn from(constraint_system: &ConstraintSystem<P, V>) -> Self {
        Stats {
            num_constraints: constraint_system.algebraic_constraints.len(),
            num_bus_interactions: constraint_system.bus_interactions.len(),
            num_witness_columns: constraint_system
                .referenced_unknown_variables()
                .unique()
                .count(),
        }
    }
}

impl<P: FieldElement, V: Ord + Clone + Hash + Eq> From<&IndexedConstraintSystem<P, V>> for Stats {
    fn from(constraint_system: &IndexedConstraintSystem<P, V>) -> Self {
        Stats::from(constraint_system.system())
    }
}


================================================
FILE: autoprecompiles/src/symbolic_machine.rs
================================================
use crate::bus_map::BusMap;
use crate::expression::{AlgebraicExpression, AlgebraicReference};
use crate::expression_conversion::{
    algebraic_to_grouped_expression, grouped_expression_to_algebraic,
};
use crate::powdr::UniqueReferences;
use itertools::Itertools;
use powdr_constraint_solver::constraint_system::{
    self, AlgebraicConstraint, BusInteraction, ConstraintSystem, DerivedVariable,
};
use powdr_constraint_solver::grouped_expression::GroupedExpression;
use powdr_expression::AlgebraicUnaryOperator;
use powdr_expression::{visitors::Children, AlgebraicUnaryOperation};
use serde::{Deserialize, Serialize};
use std::fmt::Display;
use std::iter::once;

use powdr_number::FieldElement;

#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
pub struct SymbolicInstructionStatement<T> {
    pub opcode: T,
    pub args: Vec<T>,
}

impl<T> IntoIterator for SymbolicInstructionStatement<T> {
    type IntoIter = std::iter::Chain<std::iter::Once<T>, std::vec::IntoIter<T>>;
    type Item = T;

    fn into_iter(self) -> Self::IntoIter {
        once(self.opcode).chain(self.args)
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(transparent)]
pub struct SymbolicConstraint<T> {
    pub expr: AlgebraicExpression<T>,
}

impl<T: Display> Display for SymbolicConstraint<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.expr)
    }
}

impl<T> From<AlgebraicExpression<T>> for SymbolicConstraint<T> {
    fn from(expr: AlgebraicExpression<T>) -> Self {
        let expr = match expr {
            AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation {
                op: AlgebraicUnaryOperator::Minus,
                expr,
            }) => *expr, // Remove the negation at the outside.
            other => other,
        };
        Self { expr }
    }
}

impl<T> Children<AlgebraicExpression<T>> for SymbolicConstraint<T> {
    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T>> + '_> {
        Box::new(once(&self.expr))
    }

    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T>> + '_> {
        Box::new(once(&mut self.expr))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
pub struct SymbolicBusInteraction<T> {
    pub id: u64,
    pub mult: AlgebraicExpression<T>,
    pub args: Vec<AlgebraicExpression<T>>,
}

impl<T: Display> Display for SymbolicBusInteraction<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "(id={}, mult={}, args=[{}])",
            self.id,
            self.mult,
            self.args.iter().join(", ")
        )
    }
}

impl<T: Copy> SymbolicBusInteraction<T> {
    pub fn try_multiplicity_to_number(&self) -> Option<T> {
        match self.mult {
            AlgebraicExpression::Number(n) => Some(n),
            _ => None,
        }
    }
}

impl<T> Children<AlgebraicExpression<T>> for SymbolicBusInteraction<T> {
    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T>> + '_> {
        Box::new(once(&self.mult).chain(&self.args))
    }

    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T>> + '_> {
        Box::new(once(&mut self.mult).chain(&mut self.args))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum BusInteractionKind {
    Send,
    Receive,
}

/// A machine comprised of algebraic constraints, bus interactions and potentially derived columns.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SymbolicMachine<T> {
    /// Constraints whose expressions have to evaluate to zero for an assignment to be satisfying.
    pub constraints: Vec<SymbolicConstraint<T>>,
    /// Bus interactions that model communication with other machines / chips or static lookups.
    pub bus_interactions: Vec<SymbolicBusInteraction<T>>,
    /// Columns that have been newly created during the optimization process with a method
    /// to compute their values from other columns.
    pub derived_columns: Vec<DerivedVariable<T, AlgebraicReference, AlgebraicExpression<T>>>,
}

type ComputationMethod<T> =
    powdr_constraint_solver::constraint_system::ComputationMethod<T, AlgebraicExpression<T>>;

impl<T> SymbolicMachine<T> {
    pub fn main_columns(&self) -> impl Iterator<Item = AlgebraicReference> + use<'_, T> {
        self.unique_references()
    }

    pub fn concatenate(mut self, other: SymbolicMachine<T>) -> Self {
        self.constraints.extend(other.constraints);
        self.bus_interactions.extend(other.bus_interactions);
        self.derived_columns.extend(other.derived_columns);
        self
    }
}

impl<T: Display> Display for SymbolicMachine<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        for bus_interaction in &self.bus_interactions {
            writeln!(f, "{bus_interaction}")?;
        }
        for constraint in &self.constraints {
            writeln!(f, "{constraint} = 0")?;
        }
        Ok(())
    }
}

impl<T: Display + Ord + Clone> SymbolicMachine<T> {
    pub fn render<C: Display + Clone + PartialEq + Eq>(&self, bus_map: &BusMap<C>) -> String {
        let main_columns = self.main_columns().sorted().collect_vec();
        let mut output = format!(
            "Symbolic machine using {} unique main columns:\n  {}\n",
            main_columns.len(),
            main_columns.iter().join("\n  ")
        );
        let bus_interactions_by_bus = self
            .bus_interactions
            .iter()
            .map(|bus_interaction| (bus_interaction.id, bus_interaction))
            .into_group_map()
            .into_iter()
            // sorted_by_key is stable, so we'll keep the order within each bus
            .sorted_by_key(|(bus_id, _)| *bus_id)
            .collect::<Vec<_>>();
        for (bus_id, bus_interactions) in &bus_interactions_by_bus {
            let bus_type = bus_map.bus_type(*bus_id);
            output.push_str(&format!("\n// Bus {bus_id} ({bus_type}):\n",));
            for bus_interaction in bus_interactions {
                output.push_str(&format!(
                    "mult={}, args=[{}]\n",
                    bus_interaction.mult,
                    bus_interaction.args.iter().join(", ")
                ));
            }
        }

        if !self.constraints.is_empty() {
            output.push_str("\n// Algebraic constraints:\n");
        }

        for constraint in &self.constraints {
            output.push_str(&format!("{constraint} = 0\n"));
        }

        output.trim().to_string()
    }
}

impl<T> SymbolicMachine<T> {
    pub fn degree(&self) -> usize {
        self.children().map(|e| e.degree()).max().unwrap_or(0)
    }
}

impl<T> Children<AlgebraicExpression<T>> for SymbolicMachine<T> {
    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T>> + '_> {
        Box::new(
            self.constraints
                .iter()
                .flat_map(|c| c.children())
                .chain(self.bus_interactions.iter().flat_map(|i| i.children())),
        )
    }

    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T>> + '_> {
        Box::new(
            self.constraints
                .iter_mut()
                .flat_map(|c| c.children_mut())
                .chain(
                    self.bus_interactions
                        .iter_mut()
                        .flat_map(|i| i.children_mut()),
                ),
        )
    }
}

pub fn symbolic_machine_to_constraint_system<P: FieldElement>(
    symbolic_machine: SymbolicMachine<P>,
) -> ConstraintSystem<P, AlgebraicReference> {
    ConstraintSystem {
        algebraic_constraints: symbolic_machine
            .constraints
            .iter()
            .map(|constraint| {
                AlgebraicConstraint::assert_zero(algebraic_to_grouped_expression(&constraint.expr))
            })
            .collect(),
        bus_interactions: symbolic_machine
            .bus_interactions
            .iter()
            .map(symbolic_bus_interaction_to_bus_interaction)
            .collect(),
        derived_variables: symbolic_machine
            .derived_columns
            .iter()
            .map(|derived_variable| {
                let method = match &derived_variable.computation_method {
                    ComputationMethod::Constant(c) => {
                        constraint_system::ComputationMethod::Constant(*c)
                    }
                    ComputationMethod::QuotientOrZero(e1, e2) => {
                        constraint_system::ComputationMethod::QuotientOrZero(
                            algebraic_to_grouped_expression(e1),
                            algebraic_to_grouped_expression(e2),
                        )
                    }
                };
                DerivedVariable::new(derived_variable.variable.clone(), method)
            })
            .collect(),
    }
}

pub fn constraint_system_to_symbolic_machine<P: FieldElement>(
    constraint_system: ConstraintSystem<P, AlgebraicReference>,
) -> SymbolicMachine<P> {
    SymbolicMachine {
        constraints: constraint_system
            .algebraic_constraints
            .into_iter()
            .map(|constraint| grouped_expression_to_algebraic(constraint.expression).into())
            .collect(),
        bus_interactions: constraint_system
            .bus_interactions
            .into_iter()
            .map(bus_interaction_to_symbolic_bus_interaction)
            .collect(),
        derived_columns: constraint_system
            .derived_variables
            .into_iter()
            .map(|derived_var| {
                let method = match derived_var.computation_method {
                    constraint_system::ComputationMethod::Constant(c) => {
                        constraint_system::ComputationMethod::Constant(c)
                    }
                    constraint_system::ComputationMethod::QuotientOrZero(e1, e2) => {
                        constraint_system::ComputationMethod::QuotientOrZero(
                            grouped_expression_to_algebraic(e1),
                            grouped_expression_to_algebraic(e2),
                        )
                    }
                };
                DerivedVariable::new(derived_var.variable, method)
            })
            .collect(),
    }
}

pub fn symbolic_bus_interaction_to_bus_interaction<P: FieldElement>(
    bus_interaction: &SymbolicBusInteraction<P>,
) -> BusInteraction<GroupedExpression<P, AlgebraicReference>> {
    BusInteraction {
        bus_id: GroupedExpression::from_number(P::from(bus_interaction.id)),
        payload: bus_interaction
            .args
            .iter()
            .map(|arg| algebraic_to_grouped_expression(arg))
            .collect(),
        multiplicity: algebraic_to_grouped_expression(&bus_interaction.mult),
    }
}

fn bus_interaction_to_symbolic_bus_interaction<P: FieldElement>(
    bus_interaction: BusInteraction<GroupedExpression<P, AlgebraicReference>>,
) -> SymbolicBusInteraction<P> {
    // We set the bus_id to a constant in `bus_interaction_to_symbolic_bus_interaction`,
    // so this should always succeed.
    let id = bus_interaction
        .bus_id
        .try_to_number()
        .unwrap()
        .to_arbitrary_integer()
        .try_into()
        .unwrap();
    SymbolicBusInteraction {
        id,
        args: bus_interaction
            .payload
            .into_iter()
            .map(|arg| grouped_expression_to_algebraic(arg))
            .collect(),
        mult: grouped_expression_to_algebraic(bus_interaction.multiplicity),
    }
}


================================================
FILE: autoprecompiles/src/symbolic_machine_generator.rs
================================================
use itertools::Itertools;
use powdr_constraint_solver::constraint_system::{ComputationMethod, DerivedVariable};
use powdr_expression::AlgebraicBinaryOperation;
use powdr_number::FieldElement;

use crate::{
    adapter::Adapter,
    blocks::{Instruction, SuperBlock},
    expression::AlgebraicExpression,
    powdr,
    symbolic_machine::{SymbolicBusInteraction, SymbolicConstraint, SymbolicMachine},
    Apc, BusMap, BusType, ColumnAllocator, InstructionHandler,
};

/// Converts the field type of a symbolic machine.
pub fn convert_apc_field_type<T, I, A, V, U>(
    apc: Apc<T, I, A, V>,
    convert_field_element: &impl Fn(T) -> U,
) -> Apc<U, I, A, V> {
    Apc {
        block: apc.block,
        machine: convert_machine_field_type(apc.machine, convert_field_element),
        subs: apc.subs,
        optimistic_constraints: apc.optimistic_constraints,
    }
}

/// Converts the field type of a symbolic machine.
pub fn convert_machine_field_type<T, U>(
    machine: SymbolicMachine<T>,
    convert_field_element: &impl Fn(T) -> U,
) -> SymbolicMachine<U> {
    SymbolicMachine {
        constraints: machine
            .constraints
            .into_iter()
            .map(|c| convert_symbolic_constraint(c, convert_field_element))
            .collect(),
        bus_interactions: machine
            .bus_interactions
            .into_iter()
            .map(|i| convert_bus_interaction(i, convert_field_element))
            .collect(),
        derived_columns: machine
            .derived_columns
            .into_iter()
            .map(|derived_variable| {
                let method = match derived_variable.computation_method {
                    ComputationMethod::Constant(c) => {
                        ComputationMethod::Constant(convert_field_element(c))
                    }
                    ComputationMethod::QuotientOrZero(e1, e2) => ComputationMethod::QuotientOrZero(
                        convert_expression(e1, convert_field_element),
                        convert_expression(e2, convert_field_element),
                    ),
                };
                DerivedVariable::new(derived_variable.variable, method)
            })
            .collect(),
    }
}

fn convert_symbolic_constraint<T, U>(
    constraint: SymbolicConstraint<T>,
    convert: &impl Fn(T) -> U,
) -> SymbolicConstraint<U> {
    SymbolicConstraint {
        expr: convert_expression(constraint.expr, convert),
    }
}

fn convert_bus_interaction<T, U>(
    constraint: SymbolicBusInteraction<T>,
    convert: &impl Fn(T) -> U,
) -> SymbolicBusInteraction<U> {
    SymbolicBusInteraction {
        id: constraint.id,
        mult: convert_expression(constraint.mult, convert),
        args: constraint
            .args
            .into_iter()
            .map(|e| convert_expression(e, convert))
            .collect(),
    }
}

fn convert_expression<T, U>(
    expr: AlgebraicExpression<T>,
    convert: &impl Fn(T) -> U,
) -> AlgebraicExpression<U> {
    match expr {
        AlgebraicExpression::Number(n) => AlgebraicExpression::Number(convert(n)),
        AlgebraicExpression::Reference(r) => AlgebraicExpression::Reference(r),
        AlgebraicExpression::BinaryOperation(algebraic_binary_operation) => {
            AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation {
                op: algebraic_binary_operation.op,
                left: Box::new(convert_expression(
                    *algebraic_binary_operation.left,
                    convert,
                )),
                right: Box::new(convert_expression(
                    *algebraic_binary_operation.right,
                    convert,
                )),
            })
        }
        AlgebraicExpression::UnaryOperation(algebraic_unary_operation) => {
            AlgebraicExpression::UnaryOperation(powdr_expression::AlgebraicUnaryOperation {
                op: algebraic_unary_operation.op,
                expr: Box::new(convert_expression(*algebraic_unary_operation.expr, convert)),
            })
        }
    }
}

/// Converts a basic block into a symbolic machines (all instruction circuits
/// concatenated) and a column allocator.
pub(crate) fn statements_to_symbolic_machine<A: Adapter>(
    block: &SuperBlock<A::Instruction>,
    instruction_handler: &A::InstructionHandler,
    bus_map: &BusMap<A::CustomBusTypes>,
) -> (SymbolicMachine<A::PowdrField>, ColumnAllocator) {
    let (machines, column_allocator) =
        statements_to_symbolic_machines::<A>(block, instruction_handler, bus_map);
    let machine = machines
        .into_iter()
        .reduce(SymbolicMachine::concatenate)
        .unwrap();
    (machine, column_allocator)
}

/// Converts a basic block into a list of symbolic machines (one per instruction)
/// and a column allocator. All columns are globally unique across all instructions.
pub(crate) fn statements_to_symbolic_machines<A: Adapter>(
    block: &SuperBlock<A::Instruction>,
    instruction_handler: &A::InstructionHandler,
    bus_map: &BusMap<A::CustomBusTypes>,
) -> (Vec<SymbolicMachine<A::PowdrField>>, ColumnAllocator) {
    let mut col_subs: Vec<Vec<u64>> = Vec::new();
    let mut global_idx = 0;
    let mut machines: Vec<SymbolicMachine<A::PowdrField>> = Vec::new();

    for (i, (pc, instr)) in block.instructions().enumerate() {
        let machine = instruction_handler
            .get_instruction_air_and_id(instr)
            .1
            .clone();

        let machine: SymbolicMachine<<A as Adapter>::PowdrField> =
            convert_machine_field_type(machine, &|x| A::from_field(x));

        let pc_lookup_row = instr
            .pc_lookup_row(pc)
            .into_iter()
            .map(|x| A::from_field(x))
            .collect::<Vec<_>>();

        let (next_global_idx, subs, machine) = powdr::globalize_references(machine, global_idx, i);
        global_idx = next_global_idx;

        // Make machine mutable, to add local constraints
        let mut machine = machine;

        let pc_lookup = machine
            .bus_interactions
            .iter()
            .filter(|bus_int| bus_int.id == bus_map.get_bus_id(&BusType::PcLookup).unwrap())
            .exactly_one()
            .expect("Expected single pc lookup");

        // To simplify constraint solving, we constrain `is_valid` to be 1, which effectively
        // removes the column. The optimized precompile will then have to be guarded by a new
        // `is_valid` column.
        let minus_is_valid: AlgebraicExpression<_> = exec_receive(
            &machine,
            bus_map.get_bus_id(&BusType::ExecutionBridge).unwrap(),
        )
        .mult
        .clone();
        let one = AlgebraicExpression::Number(1u64.into());
        machine
            .constraints
            .push((minus_is_valid.clone() + one).into());

        // Constrain the pc lookup to the current instruction.
        machine.constraints.extend(
            pc_lookup
                .args
                .iter()
                .zip_eq(pc_lookup_row)
                .map(|(l, r)| (l.clone() - r.into()).into()),
        );

        col_subs.push(subs);
        machines.push(machine);
    }

    (
        machines,
        ColumnAllocator {
            subs: col_subs,
            next_poly_id: global_idx,
        },
    )
}

fn exec_receive<T: FieldElement>(
    machine: &SymbolicMachine<T>,
    exec_bus_id: u64,
) -> SymbolicBusInteraction<T> {
    let [r, _s] = machine
        .bus_interactions
        .iter()
        .filter(|bus_int| bus_int.id == exec_bus_id)
        .collect::<Vec<_>>()
        .try_into()
        .unwrap();
    // TODO assert that r.mult matches -expr
    r.clone()
}


================================================
FILE: autoprecompiles/src/trace_handler.rs
================================================
use itertools::Itertools;
use powdr_constraint_solver::constraint_system::DerivedVariable;
use rayon::prelude::*;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Display;
use std::{cmp::Eq, hash::Hash};

use crate::blocks::PcStep;
use crate::expression::{AlgebraicExpression, AlgebraicReference};
use crate::{Apc, InstructionHandler};

pub struct OriginalRowReference<'a, D> {
    pub data: &'a D,
    pub start: usize,
    pub length: usize,
}

pub struct TraceData<'a, F, D> {
    /// For each call of the apc, the values of each original instruction's dummy trace.
    pub dummy_values: Vec<Vec<OriginalRowReference<'a, D>>>,
    /// The mapping from dummy trace index to APC index for each instruction.
    pub dummy_trace_index_to_apc_index_by_instruction: Vec<Vec<(usize, usize)>>,
    /// The mapping from poly_id to the index in the list of apc columns.
    /// The values are always unique and contiguous.
    pub apc_poly_id_to_index: BTreeMap<u64, usize>,
    /// Indices of columns to compute and the way to compute them
    /// (from other values).
    pub columns_to_compute: &'a [DerivedVariable<F, AlgebraicReference, AlgebraicExpression<F>>],
}

pub trait TraceTrait<F>: Send + Sync {
    type Values: Send + Sync;

    fn width(&self) -> usize;

    fn values(&self) -> &Self::Values;
}

// TODO: refactor `Apc` so we don't have to pass A, V here
pub fn generate_trace<'a, IH, M: TraceTrait<IH::Field>, A, V>(
    air_id_to_dummy_trace: &'a HashMap<IH::AirId, M>,
    instruction_handler: &'a IH,
    apc_call_count: usize,
    apc: &'a Apc<IH::Field, IH::Instruction, A, V>,
) -> TraceData<'a, IH::Field, M::Values>
where
    IH: InstructionHandler,
    IH::Field: Display + Clone + Send + Sync,
    IH::AirId: Eq + Hash + Send + Sync,
    IH::Instruction: PcStep,
{
    // Keep only instructions that produce dummy records
    let instructions_with_subs = apc
        .instructions()
        .zip_eq(apc.subs.iter())
        .filter(|(_, subs)| !subs.is_empty());
    let instructions_with_subs = instructions_with_subs.collect::<Vec<_>>();

    let original_instruction_air_ids = instructions_with_subs
        .iter()
        .map(|(instruction, _)| {
            instruction_handler
                .get_instruction_air_and_id(instruction)
                .0
        })
        .collect::<Vec<_>>();

    let air_id_occurrences = original_instruction_air_ids.iter().counts();

    let apc_poly_id_to_index: BTreeMap<u64, usize> = apc
        .machine
        .main_columns()
        .enumerate()
        .map(|(index, c)| (c.id, index))
        .collect();

    let original_instruction_table_offsets = original_instruction_air_ids
        .iter()
        .scan(
            HashMap::default(),
            |counts: &mut HashMap<&IH::AirId, usize>, air_id| {
                let count = counts.entry(air_id).or_default();
                let current_count = *count;
                *count += 1;
                Some(current_count)
            },
        )
        .collect::<Vec<_>>();

    let dummy_trace_index_to_apc_index_by_instruction = instructions_with_subs
        .iter()
        .map(|(_, subs)| {
            subs.iter()
                .map(|substitution| {
                    (
                        substitution.original_poly_index,
                        apc_poly_id_to_index[&substitution.apc_poly_id],
                    )
                })
                .collect_vec()
        })
        .collect();

    let dummy_values = (0..apc_call_count)
        .into_par_iter()
        .map(|trace_row| {
            original_instruction_air_ids
                .iter()
                .zip_eq(original_instruction_table_offsets.iter())
                .map(|(air_id, dummy_table_offset)| {
                    let trace = air_id_to_dummy_trace.get(air_id).unwrap();
                    let values = trace.values();
                    let width = trace.width();
                    let occurrences_per_record = air_id_occurrences.get(air_id).unwrap();
                    let start = (trace_row * occurrences_per_record + dummy_table_offset) * width;
                    OriginalRowReference {
                        data: values,
                        start,
                        length: width,
                    }
                })
                .collect_vec()
        })
        .collect();

    let columns_to_compute = &apc.machine.derived_columns;

    TraceData {
        dummy_values,
        dummy_trace_index_to_apc_index_by_instruction,
        apc_poly_id_to_index,
        columns_to_compute,
    }
}


================================================
FILE: autoprecompiles/tests/optimizer.rs
================================================
use expect_test::expect;
use itertools::Itertools;
use powdr_autoprecompiles::bus_map::BusMap;
use powdr_autoprecompiles::export::{ApcWithBusMap, SimpleInstruction};
use powdr_autoprecompiles::optimizer::optimize;
use powdr_autoprecompiles::symbolic_machine::SymbolicMachine;
use powdr_autoprecompiles::{Apc, ColumnAllocator, DegreeBound};
use powdr_number::BabyBearField;
use powdr_openvm_bus_interaction_handler::memory_bus_interaction::OpenVmMemoryBusInteraction;
use powdr_openvm_bus_interaction_handler::{
    bus_map::{default_openvm_bus_map, OpenVmBusType},
    OpenVmBusInteractionHandler,
};
use test_log::test;

const DEFAULT_DEGREE_BOUND: DegreeBound = DegreeBound {
    identities: 3,
    bus_interactions: 2,
};

type TestApc = Apc<BabyBearField, SimpleInstruction<BabyBearField>, (), ()>;

fn import_apc_from_gzipped_json(file: &str) -> ApcWithBusMap<TestApc, BusMap<OpenVmBusType>> {
    let file = std::fs::File::open(file).unwrap();
    let reader = flate2::read::GzDecoder::new(file);
    serde_json::from_reader(reader).unwrap()
}

#[test]
fn load_machine_json() {
    let apc = import_apc_from_gzipped_json("tests/keccak_apc_pre_opt.json.gz");
    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());

    expect![[r#"
        27521
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        13262
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        28627
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}

#[test]
fn test_optimize() {
    let apc = import_apc_from_gzipped_json("tests/keccak_apc_pre_opt.json.gz");

    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());

    let column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&machine);
    let machine = optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
        machine,
        OpenVmBusInteractionHandler::default(),
        DEFAULT_DEGREE_BOUND,
        &apc.bus_map,
        column_allocator,
        &mut Default::default(),
    )
    .unwrap()
    .0;

    // This cbor file above has the `is_valid` column removed, this is why the number below
    // might be one less than in other tests.
    expect![[r#"
        2021
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        1734
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        186
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}

#[test]
fn test_ecrecover() {
    let apc = import_apc_from_gzipped_json("tests/ecrecover_apc_pre_opt.json.gz");

    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());

    let column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&machine);
    let machine = optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
        machine,
        OpenVmBusInteractionHandler::default(),
        DEFAULT_DEGREE_BOUND,
        &default_openvm_bus_map(),
        column_allocator,
        &mut Default::default(),
    )
    .unwrap()
    .0;

    // This cbor file above has the `is_valid` column removed, this is why the number below
    // might be one less than in other tests.
    expect![[r#"
        3730
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        2314
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        3114
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}

#[test]
fn test_sha256() {
    let apc = import_apc_from_gzipped_json("tests/sha256_apc_pre_opt.json.gz");

    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());
    let column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&machine);

    let machine = optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
        machine,
        OpenVmBusInteractionHandler::default(),
        DEFAULT_DEGREE_BOUND,
        &default_openvm_bus_map(),
        column_allocator,
        &mut Default::default(),
    )
    .unwrap()
    .0;

    // This cbor file above has the `is_valid` column removed, this is why the number below
    // might be one less than in other tests.
    expect![[r#"
        12034
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        9539
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        3770
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}

#[test]
fn test_single_div_nondet() {
    let apc = import_apc_from_gzipped_json("tests/single_div_nondet.json.gz");

    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());
    let column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&machine);

    let machine = optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
        machine,
        OpenVmBusInteractionHandler::default(),
        DEFAULT_DEGREE_BOUND,
        &default_openvm_bus_map(),
        column_allocator,
        &mut Default::default(),
    )
    .unwrap()
    .0;

    let algebraic_constraints_with_zero = machine
        .constraints
        .iter()
        .map(|c| c.to_string())
        .filter(|s| s.contains("zero"))
        .join("\n");
    expect![[r#"
        (zero_divisor_0 + r_zero_0) * (zero_divisor_0 + r_zero_0 - 1)
        zero_divisor_0 * (zero_divisor_0 - 1)
        zero_divisor_0 * (q__0_0 - 255)
        zero_divisor_0 * (q__1_0 - 255)
        zero_divisor_0 * (q__2_0 - 255)
        zero_divisor_0 * (q__3_0 - 255)
        (1 - zero_divisor_0) * ((c__0_0 + c__1_0 + c__2_0 + c__3_0) * c_sum_inv_0 - 1)
        r_zero_0 * (r_zero_0 - 1)
        (1 - (zero_divisor_0 + r_zero_0)) * ((r__0_0 + r__1_0 + r__2_0 + r__3_0) * r_sum_inv_0 - 1)
        (q__0_0 + q__1_0 + q__2_0 + q__3_0) * ((1 - zero_divisor_0) * (q_sign_0 - sign_xor_0))
        (q_sign_0 - sign_xor_0) * ((1 - zero_divisor_0) * q_sign_0)
        (1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)
        (1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (lt_diff_0 - (r_prime__3_0 * (2 * c_sign_0 - 1) + c__3_0 * (1 - 2 * c_sign_0)))
        zero_divisor_0 * (c__0_0 + c__1_0 + c__2_0 + c__3_0)
        r_zero_0 * (r__0_0 + r__1_0 + r__2_0 + r__3_0)"#]]
    .assert_eq(&algebraic_constraints_with_zero);

    expect![[r#"
        47
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        24
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        44
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}

#[test]
fn test_optimize_reth_op() {
    let apc = import_apc_from_gzipped_json("tests/apc_reth_op_bug.json.gz");
    let machine: SymbolicMachine<BabyBearField> = apc.apc.machine;
    assert!(machine.derived_columns.is_empty());

    let bus_map = &apc.bus_map;
    let bus_int_handler = OpenVmBusInteractionHandler::new(bus_map.clone());

    let column_allocator = ColumnAllocator::from_max_poly_id_of_machine(&machine);
    let machine = optimize::<_, _, _, OpenVmMemoryBusInteraction<_, _>>(
        machine,
        bus_int_handler,
        DEFAULT_DEGREE_BOUND,
        bus_map,
        column_allocator,
        &mut Default::default(),
    )
    .unwrap()
    .0;

    expect![[r#"
        446
    "#]]
    .assert_debug_eq(&machine.main_columns().count());
    expect![[r#"
        356
    "#]]
    .assert_debug_eq(&machine.bus_interactions.len());
    expect![[r#"
        313
    "#]]
    .assert_debug_eq(&machine.constraints.len());
}


================================================
FILE: cli-openvm-riscv/Cargo.toml
================================================
[package]
name = "cli-openvm-riscv"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[features]
default = ["metrics"]
metrics = ["powdr-openvm/metrics", "openvm-sdk/metrics", "openvm-stark-backend/metrics", "openvm-stark-sdk/metrics"]

[[bin]]
name = "powdr_openvm_riscv"
path = "src/main.rs"
bench = false         # See https://github.com/bheisler/criterion.rs/issues/458

[dependencies]
openvm-sdk.workspace = true
openvm-stark-sdk.workspace = true
openvm-stark-backend.workspace = true
powdr-autoprecompiles.workspace = true

powdr-openvm.workspace = true
powdr-openvm-riscv.workspace = true

eyre.workspace = true

clap = { version = "^4.3", features = ["derive"] }

serde_cbor.workspace = true

tracing.workspace = true
tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
tracing-forest = "0.1"
metrics.workspace = true
metrics-tracing-context = "0.16.0"
metrics-util = "0.17.0"
serde_json.workspace = true

[lints]
workspace = true


================================================
FILE: cli-openvm-riscv/README.md
================================================
# cli-openvm

Use command `execute` to run the program only, and `prove` to prove.
The `prove` command has a `mock` option to only check the constraints.

Examples:

```sh
# Run the original program
RUSTFLAGS='-C target-cpu=native' cargo run -r execute guest
# Prove the original program
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest
# Check the constraints and witness of the original program
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest --mock
# Run the program with autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r execute guest --skip 37 --autoprecompiles 1
# Run the program with optimized autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r execute guest --skip 37 --autoprecompiles 1 --optimize
# Prove the program with autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest --skip 37 --autoprecompiles 1
# Prove the program with optimized autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest --skip 37 --autoprecompiles 1 --optimize
# Check the constraints and witness of the program with autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest --skip 37 --autoprecompiles 1 --mock
# Check the constraints and witness of the program with optimized autoprecompiles
RUSTFLAGS='-C target-cpu=native' cargo run -r prove guest --skip 37 --autoprecompiles 1 --mock --optimize
```

It is recommended to use at least `RUST_LOG=info` for information, and `RUST_LOG=debug` for benchmarks.


================================================
FILE: cli-openvm-riscv/src/main.rs
================================================
use eyre::Result;
use metrics_tracing_context::{MetricsLayer, TracingContextLayer};
use metrics_util::{debugging::DebuggingRecorder, layers::Layer};
use openvm_sdk::StdIn;
use openvm_stark_sdk::bench::serialize_metric_snapshot;
use powdr_autoprecompiles::empirical_constraints::EmpiricalConstraints;
use powdr_autoprecompiles::pgo::{pgo_config, PgoType};
use powdr_autoprecompiles::PowdrConfig;
use powdr_openvm_riscv::{
    compile_openvm, detect_empirical_constraints, CompiledProgram, GuestOptions,
    OriginalCompiledProgram, RiscvISA,
};

#[cfg(feature = "metrics")]
use openvm_stark_sdk::metrics_tracing::TimingMetricsLayer;

use clap::{Args, CommandFactory, Parser, Subcommand};
use powdr_openvm::default_powdr_openvm_config;
use std::{io, path::PathBuf};
use tracing::Level;
use tracing_forest::ForestLayer;
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};

#[derive(Parser)]
#[command(name = "powdr-openvm", author, version, about, long_about = None)]
struct Cli {
    #[command(subcommand)]
    command: Option<Commands>,
}

#[derive(Args)]
struct SharedArgs {
    #[arg(long, default_value_t = 0)]
    autoprecompiles: usize,

    #[arg(long, default_value_t = 0)]
    skip: usize,

    #[arg(long)]
    input: Option<u32>,

    #[arg(long, default_value_t = PgoType::default())]
    pgo: PgoType,

    /// When `--pgo-mode cell`, the optional max columns
    #[clap(long)]
    max_columns: Option<usize>,

    /// When `--pgo-mode cell`, the directory to persist all APC candidates + a metrics summary
    #[arg(long)]
    apc_candidates_dir: Option<PathBuf>,

    /// Maximum number of instructions in an APC
    #[arg(long)]
    apc_max_instructions: Option<u32>,

    /// Ignore APCs executed less times than the cutoff
    #[arg(long)]
    apc_exec_count_cutoff: Option<u32>,

    /// If active, generates "optimistic" precompiles. Optimistic precompiles are smaller in size
    /// but may fail at runtime if the assumptions they make are violated.
    #[arg(long)]
    #[arg(default_value_t = false)]
    optimistic_precompiles: bool,

    /// When larger than 1, enables superblocks with up to the given number of basic blocks.
    #[arg(long, default_value_t = 1, value_parser = clap::value_parser!(u8).range(1..))]
    superblocks: u8,
}

#[derive(Subcommand)]
enum Commands {
    Compile {
        guest: String,

        #[command(flatten)]
        shared: SharedArgs,
    },

    Execute {
        guest: String,

        #[command(flatten)]
        shared: SharedArgs,

        #[arg(long)]
        metrics: Option<PathBuf>,
    },

    Prove {
        guest: String,

        #[command(flatten)]
        shared: SharedArgs,

        #[arg(long)]
        #[arg(default_value_t = false)]
        mock: bool,

        #[arg(long)]
        #[arg(default_value_t = false)]
        recursion: bool,

        #[arg(long)]
        metrics: Option<PathBuf>,
    },
}

fn main() -> Result<(), io::Error> {
    let args = Cli::parse();

    setup_tracing_with_log_level(Level::INFO);

    if let Some(command) = args.command {
        run_command(command);
        Ok(())
    } else {
        Cli::command().print_help()
    }
}

fn build_powdr_config(shared: &SharedArgs) -> PowdrConfig {
    let mut powdr_config =
        default_powdr_openvm_config(shared.autoprecompiles as u64, shared.skip as u64);
    if let Some(apc_candidates_dir) = &shared.apc_candidates_dir {
        powdr_config = powdr_config.with_apc_candidates_dir(apc_candidates_dir);
    }
    powdr_config
        .with_optimistic_precompiles(shared.optimistic_precompiles)
        .with_superblocks(
            shared.superblocks,
            shared.apc_max_instructions,
            shared.apc_exec_count_cutoff,
        )
}

fn run_command(command: Commands) {
    let guest_opts = GuestOptions::default();
    match command {
        Commands::Compile { guest, shared } => {
            validate_shared_args(&shared);
            let powdr_config = build_powdr_config(&shared);
            let guest_program = compile_openvm(&guest, guest_opts.clone()).unwrap();
            let execution_profile = powdr_openvm::execution_profile_from_guest(
                &guest_program,
                stdin_from(shared.input),
            );

            let empirical_constraints = maybe_compute_empirical_constraints(
                &guest_program,
                &powdr_config,
                stdin_from(shared.input),
            );
            let pgo_config = pgo_config(shared.pgo, shared.max_columns, execution_profile);
            let program = powdr_openvm_riscv::compile_exe(
                guest_program,
                powdr_config,
                pgo_config,
                empirical_constraints,
            )
            .unwrap();
            write_program_to_file(program, &format!("{guest}_compiled.cbor")).unwrap();
        }

        Commands::Execute {
            guest,
            shared,
            metrics,
        } => {
            validate_shared_args(&shared);
            if shared.superblocks > 1 {
                Cli::command()
                    .error(
                        clap::error::ErrorKind::ArgumentConflict,
                        "OpenVM execution with superblocks not yet supported.",
                    )
                    .exit();
            }
            let powdr_config = build_powdr_config(&shared);
            let guest_program = compile_openvm(&guest, guest_opts.clone()).unwrap();
            let empirical_constraints = maybe_compute_empirical_constraints(
                &guest_program,
                &powdr_config,
                stdin_from(shared.input),
            );
            let execution_profile = powdr_openvm::execution_profile_from_guest(
                &guest_program,
                stdin_from(shared.input),
            );
            let pgo_config = pgo_config(shared.pgo, shared.max_columns, execution_profile);
            let compile_and_exec = || {
                let program = powdr_openvm_riscv::compile_exe(
                    guest_program,
                    powdr_config,
                    pgo_config,
                    empirical_constraints,
                )
                .unwrap();
                powdr_openvm::execute(program, stdin_from(shared.input)).unwrap();
            };
            if let Some(metrics_path) = metrics {
                run_with_metric_collection_to_file(
                    std::fs::File::create(metrics_path).expect("Failed to create metrics file"),
                    compile_and_exec,
                );
            } else {
                compile_and_exec()
            }
        }

        Commands::Prove {
            guest,
            shared,
            mock,
            recursion,
            metrics,
        } => {
            validate_shared_args(&shared);
            if shared.superblocks > 1 {
                Cli::command()
                    .error(
                        clap::error::ErrorKind::ArgumentConflict,
                        "OpenVM execution with superblocks not yet supported.",
                    )
                    .exit();
            }
            let powdr_config = build_powdr_config(&shared);
            let guest_program = compile_openvm(&guest, guest_opts).unwrap();
            let empirical_constraints = maybe_compute_empirical_constraints(
                &guest_program,
                &powdr_config,
                stdin_from(shared.input),
            );

            let execution_profile = powdr_openvm::execution_profile_from_guest(
                &guest_program,
                stdin_from(shared.input),
            );
            let pgo_config = pgo_config(shared.pgo, shared.max_columns, execution_profile);
            let compile_and_prove = || {
                let program = powdr_openvm_riscv::compile_exe(
                    guest_program,
                    powdr_config,
                    pgo_config,
                    empirical_constraints,
                )
                .unwrap();
                powdr_openvm_riscv::prove(&program, mock, recursion, stdin_from(shared.input), None)
                    .unwrap()
            };
            if let Some(metrics_path) = metrics {
                run_with_metric_collection_to_file(
                    std::fs::File::create(metrics_path).expect("Failed to create metrics file"),
                    compile_and_prove,
                );
            } else {
                compile_and_prove()
            }
        }
    }
}

fn write_program_to_file(
    program: CompiledProgram<RiscvISA>,
    filename: &str,
) -> Result<(), io::Error> {
    use std::fs::File;

    let mut file = File::create(filename)?;
    serde_cbor::to_writer(&mut file, &program).map_err(io::Error::other)?;
    Ok(())
}

fn validate_shared_args(args: &SharedArgs) {
    if args.superblocks > 1 && !matches!(args.pgo, PgoType::Cell) {
        Cli::command()
            .error(
                clap::error::ErrorKind::ArgumentConflict,
                "superblocks are only supported with `--pgo cell`",
            )
            .exit();
    }
}

fn stdin_from(input: Option<u32>) -> StdIn {
    let mut s = StdIn::default();
    if let Some(i) = input {
        s.write(&i)
    }
    s
}

fn setup_tracing_with_log_level(level: Level) {
    let env_filter = EnvFilter::try_from_default_env()
        .unwrap_or_else(|_| EnvFilter::new(format!("{level},p3_=warn")));
    let subscriber = Registry::default()
        .with(env_filter)
        .with(ForestLayer::default())
        .with(MetricsLayer::new());
    #[cfg(feature = "metrics")]
    let subscriber = subscriber.with(TimingMetricsLayer::new());
    tracing::subscriber::set_global_default(subscriber).unwrap();
}

/// export stark-backend metrics to the given file
pub fn run_with_metric_collection_to_file<R>(file: std::fs::File, f: impl FnOnce() -> R) -> R {
    let recorder = DebuggingRecorder::new();
    let snapshotter = recorder.snapshotter();
    let recorder = TracingContextLayer::all().layer(recorder);
    metrics::set_global_recorder(recorder).unwrap();
    let res = f();

    serde_json::to_writer_pretty(&file, &serialize_metric_snapshot(snapshotter.snapshot()))
        .unwrap();
    res
}

/// If optimistic precompiles are enabled, compute empirical constraints from the execution
/// of the guest program on the given stdin, and save them to disk.
fn maybe_compute_empirical_constraints(
    guest_program: &OriginalCompiledProgram<RiscvISA>,
    powdr_config: &PowdrConfig,
    stdin: StdIn,
) -> EmpiricalConstraints {
    if !powdr_config.should_use_optimistic_precompiles {
        return EmpiricalConstraints::default();
    }

    tracing::warn!(
        "Optimistic precompiles are not implemented yet. Computing empirical constraints..."
    );

    let empirical_constraints =
        detect_empirical_constraints(guest_program, powdr_config.degree_bound, vec![stdin]);

    if let Some(path) = &powdr_config.apc_candidates_dir_path {
        std::fs::create_dir_all(path).expect("Failed to create apc candidates directory");
        tracing::info!(
            "Saving empirical constraints debug info to {}/empirical_constraints.json",
            path.display()
        );
        let json = serde_json::to_string_pretty(&empirical_constraints).unwrap();
        std::fs::write(path.join("empirical_constraints.json"), json).unwrap();
    }
    empirical_constraints
}


================================================
FILE: constraint-solver/Cargo.toml
================================================
[package]
name = "powdr-constraint-solver"
description = "powdr tools to analyze and solve algebraic constraints"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[dependencies]
powdr-number.workspace = true

itertools.workspace = true
num-traits.workspace = true
derive_more.workspace = true
auto_enums = "0.8.5"
log.workspace = true
bitvec = "1.0.1"
serde.workspace = true

crepe = { git = "https://github.com/powdr-labs/crepe", rev = "powdr-0.1.11" }
derivative.workspace = true

[dev-dependencies]
pretty_assertions.workspace = true
env_logger.workspace = true
test-log.workspace = true
expect-test = "1.5.1"
serde_json.workspace = true

[package.metadata.cargo-udeps.ignore]
development = ["env_logger"]

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: constraint-solver/src/algebraic_constraint/mod.rs
================================================
use std::fmt::Display;

use crate::{
    grouped_expression::GroupedExpression,
    runtime_constant::{RuntimeConstant, Substitutable},
};

use num_traits::{One, Zero};
use serde::Serialize;

pub mod solve;

/// An algebraic constraint
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Serialize)]
#[serde(transparent)]
pub struct AlgebraicConstraint<V> {
    /// The expression representing the constraint, which must evaluate to 0 for the constraint to be satisfied.
    pub expression: V,
}

// We implement `From` to make writing tests easier. However, we recommend using `AlgebraicConstraint::assert_zero` for clarity
impl<V> From<V> for AlgebraicConstraint<V> {
    fn from(expression: V) -> Self {
        AlgebraicConstraint::assert_zero(expression)
    }
}

impl<V> AlgebraicConstraint<V> {
    /// Create a constraint which asserts that the expression evaluates to 0.
    pub fn assert_zero(expression: V) -> Self {
        AlgebraicConstraint { expression }
    }

    /// Returns a constraint over a reference to the expression. This is useful to interact with the solver.
    pub fn as_ref(&self) -> AlgebraicConstraint<&V> {
        AlgebraicConstraint {
            expression: &self.expression,
        }
    }
}

impl<V: Clone> AlgebraicConstraint<&V> {
    pub(crate) fn cloned(&self) -> AlgebraicConstraint<V> {
        AlgebraicConstraint {
            expression: self.expression.clone(),
        }
    }
}

impl<T: RuntimeConstant, V: Clone + Ord> AlgebraicConstraint<GroupedExpression<T, V>> {
    /// Returns a constraint which asserts that the two expressions are equal.
    pub fn assert_eq(expression: GroupedExpression<T, V>, other: GroupedExpression<T, V>) -> Self {
        Self::assert_zero(expression - other)
    }

    /// Returns a constraint which asserts that the expression is a boolean.
    pub fn assert_bool(expression: GroupedExpression<T, V>) -> Self {
        Self::assert_zero(expression.clone() * (expression - GroupedExpression::one()))
    }
}

impl<V: Zero> AlgebraicConstraint<V> {
    pub fn is_redundant(&self) -> bool {
        self.expression.is_zero()
    }
}

impl<T: RuntimeConstant + Substitutable<V>, V: Clone + Eq + Ord>
    AlgebraicConstraint<GroupedExpression<T, V>>
{
    /// Substitute a variable by a symbolically known expression. The variable can be known or unknown.
    /// If it was already known, it will be substituted in the known expressions.
    pub fn substitute_by_known(&mut self, variable: &V, substitution: &T) {
        self.expression.substitute_by_known(variable, substitution);
    }

    pub fn degree(&self) -> usize {
        self.expression.degree()
    }
}

impl<V: Display> Display for AlgebraicConstraint<V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} = 0", self.expression)
    }
}

impl<T, V> AlgebraicConstraint<GroupedExpression<T, V>> {
    /// Returns the referenced unknown variables. Might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &V> + '_> {
        self.expression.referenced_unknown_variables()
    }
}

impl<T, V> AlgebraicConstraint<&GroupedExpression<T, V>> {
    /// Returns the referenced unknown variables. Might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &V> + '_> {
        self.expression.referenced_unknown_variables()
    }
}


================================================
FILE: constraint-solver/src/algebraic_constraint/solve.rs
================================================
use std::{collections::HashSet, fmt::Display, hash::Hash};

use itertools::Itertools;
use num_traits::Zero;
use powdr_number::FieldElement;

use crate::{
    algebraic_constraint::AlgebraicConstraint,
    effect::{Assertion, Condition, Effect},
    grouped_expression::{GroupedExpression, RangeConstraintProvider},
    range_constraint::RangeConstraint,
    runtime_constant::RuntimeConstant,
};

#[derive(Default)]
pub struct ProcessResult<T: FieldElement, V> {
    pub effects: Vec<Effect<T, V>>,
    pub complete: bool,
}

impl<T: FieldElement, V> ProcessResult<T, V> {
    pub fn empty() -> Self {
        Self {
            effects: vec![],
            complete: false,
        }
    }
    pub fn complete(effects: Vec<Effect<T, V>>) -> Self {
        Self {
            effects,
            complete: true,
        }
    }
}

#[derive(Debug, PartialEq, Eq)]
pub enum Error {
    /// The range constraints of the parts do not cover the full constant sum.
    ConflictingRangeConstraints,
    /// An equality constraint evaluates to a known-nonzero value.
    ConstraintUnsatisfiable(String),
}

impl<T, V> AlgebraicConstraint<&GroupedExpression<T, V>>
where
    T: FieldElement,
    V: Ord + Clone + Eq + Hash + Display,
{
    /// Solves the equation `self = 0` and returns how to compute the solution.
    /// The solution can contain assignments to multiple variables.
    /// If no way to solve the equation (and no way to derive new range
    /// constraints) has been found, but it still contains
    /// unknown variables, returns an empty, incomplete result.
    /// If the equation is known to be unsolvable, returns an error.
    pub fn solve(
        &self,
        range_constraints: &impl RangeConstraintProvider<T, V>,
    ) -> Result<ProcessResult<T, V>, Error> {
        let expression = self.expression;

        if !expression
            .range_constraint(range_constraints)
            .allows_value(Zero::zero())
        {
            return Err(Error::ConstraintUnsatisfiable(self.to_string()));
        }

        if expression.is_quadratic() {
            self.solve_quadratic(range_constraints)
        } else if let Some(k) = expression.try_to_known() {
            // If we know `expression` to be nonzero, we should have returned
            // Err already in the range constraint check above.
            assert!(k.is_zero());
            // TODO we could still process more information
            // and reach "unsatisfiable" here.
            Ok(ProcessResult::complete(vec![]))
        } else {
            self.solve_affine(range_constraints)
        }
    }

    /// Solves the constraint for `variable`. This is only possible if
    /// `variable` does not appear in the quadratic component and
    /// has a coefficient which is known to be not zero.
    ///
    /// If the constraint has the form `A + k * x = 0` where `A` does not
    /// contain the variable `x` and `k` is a non-zero runtime constant,
    /// it returns `A * (-k^(-1))`.
    ///
    /// Returns the resulting solved grouped expression.
    pub fn try_solve_for(&self, variable: &V) -> Option<GroupedExpression<T, V>> {
        let coefficient = self
            .expression
            .coefficient_of_variable_in_affine_part(variable)?;
        assert!(!coefficient.is_zero());

        let subtracted = self.expression.clone()
            - GroupedExpression::from_unknown_variable(variable.clone()) * *coefficient;
        if subtracted.referenced_unknown_variables().contains(variable) {
            // There is another occurrence of the variable in the quadratic component,
            // we cannot solve for it.
            return None;
        }
        Some(subtracted * (-coefficient.field_inverse()))
    }

    /// Algebraically transforms the constraint such that `self = 0` is equivalent
    /// to `expr = result` and returns `result`.
    ///
    /// Returns `None` if it cannot solve (this happens for example if self is quadratic).
    /// Panics if `expr` is quadratic.
    pub fn try_solve_for_expr(
        &self,
        expr: &GroupedExpression<T, V>,
    ) -> Option<GroupedExpression<T, V>> {
        let expression = self.expression;

        assert!(
            expr.is_affine(),
            "Tried to solve for quadratic expression {expr}"
        );
        if expression.is_quadratic() {
            return None;
        }

        // Find a normalization factor by iterating over the variables.
        let normalization_factor = expr
            .referenced_unknown_variables()
            .find_map(|var| {
                let coeff = expression.coefficient_of_variable_in_affine_part(var)?;
                // We can only divide if we know the coefficient is non-zero.
                if coeff.is_known_nonzero() {
                    Some(
                        expr.coefficient_of_variable_in_affine_part(var)
                            .unwrap()
                            .field_div(coeff),
                    )
                } else {
                    None
                }
            })
            .unwrap_or(T::one());
        let result = expr - &(self.expression.clone() * normalization_factor);

        // Check that the operations removed all variables in `expr` from `self`.
        if !expr
            .referenced_unknown_variables()
            .collect::<HashSet<_>>()
            .is_disjoint(
                &result
                    .referenced_unknown_variables()
                    .collect::<HashSet<_>>(),
            )
        {
            // The variables did not fully cancel out
            return None;
        }
        Some(result)
    }

    fn solve_affine(
        &self,
        range_constraints: &impl RangeConstraintProvider<T, V>,
    ) -> Result<ProcessResult<T, V>, Error> {
        Ok(
            if let Ok((var, coeff)) = self.expression.linear_components().exactly_one() {
                // Solve "coeff * X + self.constant = 0" by division.
                assert!(
                    !coeff.is_known_zero(),
                    "Zero coefficient has not been removed: {self}"
                );
                let constant = self.expression.constant_offset();
                if coeff.is_known_nonzero() {
                    // In this case, we can always compute a solution.
                    let value = constant.field_div(&-*coeff);
                    ProcessResult::complete(vec![assignment_if_satisfies_range_constraints(
                        var.clone(),
                        value,
                        range_constraints,
                    )?])
                } else if constant.is_known_nonzero() {
                    // If the offset is not zero, then the coefficient must be non-zero,
                    // otherwise the constraint is violated.
                    let value = constant.field_div(&-*coeff);
                    ProcessResult::complete(vec![
                        Assertion::assert_is_nonzero(*coeff),
                        assignment_if_satisfies_range_constraints(
                            var.clone(),
                            value,
                            range_constraints,
                        )?,
                    ])
                } else {
                    // If this case, we could have an equation of the form
                    // 0 * X = 0, which is valid and generates no information about X.
                    ProcessResult::empty()
                }
            } else {
                ProcessResult {
                    effects: self.transfer_constraints(range_constraints),
                    complete: false,
                }
            },
        )
    }

    /// Extract the range constraints from the expression.
    /// Assumptions:
    /// - The expression is linear
    fn transfer_constraints(
        &self,
        range_constraints: &impl RangeConstraintProvider<T, V>,
    ) -> Vec<Effect<T, V>> {
        // Solve for each of the variables in the linear component and
        // compute the range constraints.
        assert!(!self.expression.is_quadratic());
        self.expression
            .linear_components()
            .filter_map(|(var, _)| {
                let rc = self.try_solve_for(var)?.range_constraint(range_constraints);
                Some((var, rc))
            })
            .filter(|(_, constraint)| !constraint.is_unconstrained())
            .map(|(var, constraint)| Effect::RangeConstraint(var.clone(), constraint))
            .collect()
    }

    fn solve_quadratic(
        &self,
        range_constraints: &impl RangeConstraintProvider<T, V>,
    ) -> Result<ProcessResult<T, V>, Error> {
        let expression = self.expression;
        let Some((left, right)) = expression.try_as_single_product() else {
            return Ok(ProcessResult::empty());
        };
        // Now we have `left * right = 0`, i.e. one (or both) of them has to be zero.
        let (left_solution, right_solution) = match (
            AlgebraicConstraint::assert_zero(left).solve(range_constraints),
            AlgebraicConstraint::assert_zero(right).solve(range_constraints),
        ) {
            // If one of them is always unsatisfiable, it is equivalent to just solving the other one for zero.
            (Err(_), o) | (o, Err(_)) => {
                return o;
            }
            (Ok(left), Ok(right)) => (left, right),
        };

        if let Some(result) =
            combine_to_conditional_assignment(&left_solution, &right_solution, range_constraints)
        {
            return Ok(result);
        }

        // Now at least combine new range constraints on the same variable.
        // TODO: This will correctly find a bit range constraint on
        // `(X - 1) * X = 0`, but it fails to detect the case of
        // `X * X - X`.
        // This could be fixed by finding a canonical form for the quadratic
        // expression, and normalizing the constants.
        Ok(combine_range_constraints(&left_solution, &right_solution))
    }
}

/// Tries to combine two process results from alternative branches into a
/// conditional assignment.
fn combine_to_conditional_assignment<T: FieldElement, V: Ord + Clone + Eq + Display>(
    left: &ProcessResult<T, V>,
    right: &ProcessResult<T, V>,
    range_constraints: &impl RangeConstraintProvider<T, V>,
) -> Option<ProcessResult<T, V>> {
    let [Effect::Assignment(first_var, first_assignment)] = left.effects.as_slice() else {
        return None;
    };
    let [Effect::Assignment(second_var, second_assignment)] = right.effects.as_slice() else {
        return None;
    };

    if first_var != second_var {
        return None;
    }

    // At this point, we have two assignments to the same variable, i.e.
    // "`X = A` or `X = B`". If the two alternatives can never be satisfied at
    // the same time (i.e. the "or" is exclusive), we can turn this into a
    // conditional assignment.

    let diff = *first_assignment + -*second_assignment;

    // Now if `rc + diff` is disjoint from `rc`, it means
    // that if the value that `A` evaluates to falls into the allowed range for `X`,
    // then `B = A + diff` is not a possible value for `X` and vice-versa.
    // This means the two alternatives are disjoint and we can use a conditional assignment.
    let rc = range_constraints.get(first_var);
    if !rc
        .combine_sum(&RangeConstraint::from_value(diff))
        .is_disjoint(&rc)
    {
        return None;
    }

    Some(ProcessResult {
        effects: vec![Effect::ConditionalAssignment {
            variable: first_var.clone(),
            condition: Condition {
                value: *first_assignment,
                condition: rc,
            },
            in_range_value: *first_assignment,
            out_of_range_value: *second_assignment,
        }],
        complete: left.complete && right.complete,
    })
}

/// Turns an effect into a range constraint on a variable.
fn effect_to_range_constraint<T: FieldElement, V: Ord + Clone + Eq>(
    effect: &Effect<T, V>,
) -> Option<(V, RangeConstraint<T>)> {
    match effect {
        Effect::RangeConstraint(var, rc) => Some((var.clone(), *rc)),
        Effect::Assignment(var, value) => Some((var.clone(), value.range_constraint())),
        _ => None,
    }
}

/// Tries to combine range constraint results from two alternative branches.
/// In some cases, if both branches produce a complete range constraint for the same variable,
/// and those range constraints can be combined without loss, the result is complete as well.
fn combine_range_constraints<T: FieldElement, V: Ord + Clone + Eq + Hash + Display>(
    left: &ProcessResult<T, V>,
    right: &ProcessResult<T, V>,
) -> ProcessResult<T, V> {
    let left_constraints = left
        .effects
        .iter()
        .filter_map(|e| effect_to_range_constraint(e))
        .into_grouping_map()
        .reduce(|rc1, _, rc2| rc1.conjunction(&rc2));
    let right_constraints = right
        .effects
        .iter()
        .filter_map(|e| effect_to_range_constraint(e))
        .into_grouping_map()
        .reduce(|rc1, _, rc2| rc1.conjunction(&rc2));

    let effects = left_constraints
        .iter()
        .filter_map(|(v, rc1)| {
            let rc2 = right_constraints.get(v)?;
            let rc = rc1.disjunction(rc2);
            // This does not capture all cases where the disjunction does not lose information,
            // but we want this to be an indicator of whether we can remove the original
            // constraint, and thus we want it to only hit the "single value" case.
            let complete = rc1.try_to_single_value().is_some()
                && rc2.try_to_single_value().is_some()
                && rc.size_estimate() <= 2.into();
            Some((v, rc, complete))
        })
        .collect_vec();
    // The completeness is tricky, but if there is just a single left effect
    // and a single right effect and the final range constraint is complete,
    // it means that both branches have a concrete assignment for the variable
    // and thus the range constraint is exactly what the original constraint captures.
    let complete = left.effects.len() == 1
        && right.effects.len() == 1
        && effects.len() == 1
        && effects.iter().all(|(_, _, complete)| *complete);
    ProcessResult {
        effects: effects
            .into_iter()
            .map(|(v, rc, _)| Effect::RangeConstraint(v.clone(), rc))
            .collect(),
        complete,
    }
}

fn assignment_if_satisfies_range_constraints<T: FieldElement, V: Ord + Clone + Eq>(
    var: V,
    value: T,
    range_constraints: &impl RangeConstraintProvider<T, V>,
) -> Result<Effect<T, V>, Error> {
    let rc = range_constraints.get(&var);
    if rc.is_disjoint(&value.range_constraint()) {
        return Err(Error::ConflictingRangeConstraints);
    }
    Ok(Effect::Assignment(var, value))
}

#[cfg(test)]
mod tests {
    use std::collections::HashMap;

    use crate::grouped_expression::NoRangeConstraints;

    use super::*;
    use powdr_number::GoldilocksField;

    use pretty_assertions::assert_eq;

    type Qse = GroupedExpression<GoldilocksField, &'static str>;

    fn var(name: &'static str) -> Qse {
        Qse::from_unknown_variable(name)
    }

    fn constant(value: u64) -> Qse {
        Qse::from_number(GoldilocksField::from(value))
    }

    #[test]
    fn unsolvable() {
        let r = AlgebraicConstraint::assert_zero(&Qse::from_number(GoldilocksField::from(10)))
            .solve(&NoRangeConstraints);
        assert!(r.is_err());
    }

    #[test]
    fn solvable_without_vars() {
        let constr = constant(0);
        let result = AlgebraicConstraint::assert_zero(&constr)
            .solve(&NoRangeConstraints)
            .unwrap();
        assert!(result.complete && result.effects.is_empty());
    }

    #[test]
    fn solve_simple_eq() {
        let y = Qse::from_unknown_variable("y");
        let x = Qse::from_unknown_variable("X");
        // 2 * X + 7 * y - 10 = 0
        let two = constant(2);
        let seven = constant(7);
        let ten = constant(10);
        let mut constr = two * x + seven * y - ten;
        constr.substitute_by_known(&"y", &GoldilocksField::from(13));
        let result = AlgebraicConstraint::assert_zero(&constr)
            .solve(&NoRangeConstraints)
            .unwrap();
        assert!(result.complete);
        assert_eq!(result.effects.len(), 1);
        let Effect::Assignment(var, expr) = &result.effects[0] else {
            panic!("Expected assignment");
        };
        assert_eq!(var.to_string(), "X");
        assert_eq!(
            expr.to_string(),
            ((GoldilocksField::from(7) * GoldilocksField::from(13) - GoldilocksField::from(10))
                / GoldilocksField::from(-2))
            .to_string()
        );
    }

    #[test]
    fn solve_constraint_transfer() {
        let rc = RangeConstraint::from_mask(0xffu32);
        let a = Qse::from_unknown_variable("a");
        let b = Qse::from_unknown_variable("b");
        let c = Qse::from_unknown_variable("c");
        let z = Qse::from_unknown_variable("Z");
        let range_constraints = HashMap::from([("a", rc), ("b", rc), ("c", rc)]);
        // a * 0x100 + b * 0x10000 + c * 0x1000000 + 10 - Z = 0
        let ten = constant(10);
        let constr =
            a * constant(0x100) + b * constant(0x10000) + c * constant(0x1000000) + ten.clone()
                - z.clone();
        let result = AlgebraicConstraint::assert_zero(&constr)
            .solve(&range_constraints)
            .unwrap();
        assert!(!result.complete);
        let effects = result
            .effects
            .into_iter()
            .map(|effect| match effect {
                Effect::RangeConstraint(v, rc) => format!("{v}: {rc};\n"),
                _ => panic!(),
            })
            .format("")
            .to_string();
        // It appears twice because we solve the positive and the negated equation.
        // Note that the negated version has a different bit mask.
        assert_eq!(
            effects,
            "Z: [10, 4294967050] & 0xffffff0a;
"
        );
    }

    fn unpack_range_constraint(
        process_result: &ProcessResult<GoldilocksField, &'static str>,
    ) -> (&'static str, RangeConstraint<GoldilocksField>) {
        let [effect] = &process_result.effects[..] else {
            panic!();
        };
        let Effect::RangeConstraint(var, rc) = effect else {
            panic!();
        };
        (var, *rc)
    }

    #[test]
    fn detect_bit_constraint() {
        let a = Qse::from_unknown_variable("a");
        let one = constant(1);
        let three = constant(3);
        let five = constant(5);

        // All these constraints should be equivalent to a bit constraint.
        let constraints = [
            a.clone() * (a.clone() - one.clone()),
            (a.clone() - one.clone()) * a.clone(),
            (three * a.clone()) * (five.clone() * a.clone() - five),
        ];

        for constraint in constraints {
            let result = AlgebraicConstraint::assert_zero(&constraint)
                .solve(&NoRangeConstraints)
                .unwrap();
            assert!(result.complete);
            let (var, rc) = unpack_range_constraint(&result);
            assert_eq!(var.to_string(), "a");
            assert_eq!(rc, RangeConstraint::from_mask(1u64));
        }
    }

    #[test]
    fn detect_complete_range_constraint() {
        let a = Qse::from_unknown_variable("a");
        let three = constant(3);
        let four = constant(4);

        // `a` can be 3 or 4, which is can be completely represented by
        // RangeConstraint::from_range(3, 4), so the identity should be
        // marked as complete.
        let constraint = (a.clone() - three) * (a - four);

        let result = AlgebraicConstraint::assert_zero(&constraint)
            .solve(&NoRangeConstraints)
            .unwrap();
        assert!(result.complete);
        let (var, rc) = unpack_range_constraint(&result);
        assert_eq!(var.to_string(), "a");
        assert_eq!(
            rc,
            RangeConstraint::from_range(GoldilocksField::from(3), GoldilocksField::from(4))
        );
    }

    #[test]
    fn detect_incomplete_range_constraint() {
        let a = Qse::from_unknown_variable("a");
        let three = constant(3);
        let five = constant(5);

        // `a` can be 3 or 5, so there is a range constraint
        // RangeConstraint::from_range(3, 5) on `a`.
        // However, the identity is not complete, because the
        // range constraint allows for a value of 4, so removing
        // the identity would loose information.
        let constraint = (a.clone() - three) * (a - five);

        let result = AlgebraicConstraint::assert_zero(&constraint)
            .solve(&NoRangeConstraints)
            .unwrap();
        assert!(!result.complete);
        let (var, rc) = unpack_range_constraint(&result);
        assert_eq!(var.to_string(), "a");
        assert_eq!(
            rc,
            RangeConstraint::from_range(GoldilocksField::from(3), GoldilocksField::from(5))
        );
    }

    #[test]
    fn bool_plus_one_cant_be_zero() {
        let expr = var("a") + constant(1);
        let rc = RangeConstraint::from_mask(0x1u64);
        let range_constraints = HashMap::from([("a", rc)]);
        assert!(AlgebraicConstraint::assert_zero(&expr)
            .solve(&range_constraints)
            .is_err());
    }

    #[test]
    fn solve_for() {
        let expr = var("w") + var("x") + constant(3) * var("y") + constant(5);
        let constr = AlgebraicConstraint::assert_zero(&expr);
        assert_eq!(expr.to_string(), "w + x + 3 * y + 5");
        assert_eq!(
            constr.try_solve_for(&"x").unwrap().to_string(),
            "-(w + 3 * y + 5)"
        );
        assert_eq!(
            constr.try_solve_for(&"y").unwrap().to_string(),
            "6148914689804861440 * w + 6148914689804861440 * x - 6148914689804861442"
        );
        assert!(constr.try_solve_for(&"t").is_none());
    }

    #[test]
    fn solve_for_expr() {
        let expr = var("w") + var("x") + constant(3) * var("y") + constant(5);
        let constr = AlgebraicConstraint::assert_zero(&expr);
        assert_eq!(expr.to_string(), "w + x + 3 * y + 5");
        assert_eq!(
            constr.try_solve_for_expr(&var("x")).unwrap().to_string(),
            "-(w + 3 * y + 5)"
        );
        assert_eq!(
            constr.try_solve_for_expr(&var("y")).unwrap().to_string(),
            "6148914689804861440 * w + 6148914689804861440 * x - 6148914689804861442"
        );
        assert_eq!(
            constr
                .try_solve_for_expr(&-(constant(3) * var("y")))
                .unwrap()
                .to_string(),
            "w + x + 5"
        );
        assert_eq!(
            constr
                .try_solve_for_expr(&-(constant(3) * var("y") + constant(2)))
                .unwrap()
                .to_string(),
            "w + x + 3"
        );
        assert_eq!(
            constr
                .try_solve_for_expr(&(var("x") + constant(3) * var("y") + constant(2)))
                .unwrap()
                .to_string(),
            "-(w + 3)"
        );
        // We cannot solve these because the constraint does not contain a linear multiple
        // of the expression.
        assert!(constr
            .try_solve_for_expr(&(var("x") + constant(2) * var("y")))
            .is_none());
        assert!(constr.try_solve_for_expr(&(var("x") + var("y"))).is_none());
        assert!(constr
            .try_solve_for_expr(&(constant(2) * var("x") + var("y")))
            .is_none());
    }

    #[test]
    fn solve_for_expr_normalization() {
        // Test normalization
        let t = GoldilocksField::from(3);
        let r = GoldilocksField::from(7);
        let expr = var("x") * r + var("y") * t;
        let constr = AlgebraicConstraint::assert_zero(&expr);
        assert_eq!(constr.to_string(), "7 * x + 3 * y = 0");
        assert_eq!(
            constr
                .try_solve_for_expr(&(var("x") * r))
                .unwrap()
                .to_string(),
            "-(3 * y)"
        );
    }
}


================================================
FILE: constraint-solver/src/bus_interaction_handler.rs
================================================
use itertools::Itertools;
use powdr_number::FieldElement;

use crate::{constraint_system::BusInteraction, range_constraint::RangeConstraint};

/// The sent / received data could not be received / sent.
#[derive(Debug)]
pub struct ViolatesBusRules {}

/// A trait for handling bus interactions.
pub trait BusInteractionHandler<T: FieldElement> {
    /// Handles a bus interaction, by transforming taking a bus interaction
    /// (with the fields represented by range constraints) and returning
    /// updated range constraints.
    /// The idea is that a certain combination of range constraints on elements
    /// can be further restricted given internal knowledge about the specific
    /// bus interaction, in particular if some elements are restricted to just
    /// a few or even concrete values.
    /// The range constraints are intersected with the previous ones by the
    /// caller, so there is no need to do that in the implementation of this
    /// trait.
    fn handle_bus_interaction(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<T>>,
    ) -> BusInteraction<RangeConstraint<T>>;

    /// Like handle_bus_interaction, but returns an error if the current bus
    /// interaction violates the rules of the bus (e.g. [1234] in [BYTES]).
    fn handle_bus_interaction_checked(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<T>>,
    ) -> Result<BusInteraction<RangeConstraint<T>>, ViolatesBusRules> {
        let previous_constraints = bus_interaction.clone();
        let new_constraints = self.handle_bus_interaction(bus_interaction);

        // Intersect the old and new range constraints. If they don't overlap,
        // there is a contradiction.
        for (previous_rc, new_rc) in previous_constraints
            .fields()
            .zip_eq(new_constraints.fields())
        {
            if previous_rc.is_disjoint(new_rc) {
                return Err(ViolatesBusRules {});
            }
        }
        Ok(new_constraints)
    }
}

/// A default bus interaction handler that does nothing. Using it is
/// equivalent to ignoring bus interactions.
#[derive(Default, Clone)]
pub struct DefaultBusInteractionHandler<T: FieldElement> {
    _marker: std::marker::PhantomData<T>,
}

impl<T: FieldElement> BusInteractionHandler<T> for DefaultBusInteractionHandler<T> {
    fn handle_bus_interaction(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<T>>,
    ) -> BusInteraction<RangeConstraint<T>> {
        bus_interaction
    }
}


================================================
FILE: constraint-solver/src/constraint_system.rs
================================================
use crate::{
    bus_interaction_handler::ViolatesBusRules,
    effect::Effect,
    grouped_expression::{GroupedExpression, RangeConstraintProvider},
    range_constraint::RangeConstraint,
    runtime_constant::{RuntimeConstant, Substitutable},
};
use derivative::Derivative;
use itertools::Itertools;
use powdr_number::FieldElement;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::{fmt::Display, hash::Hash};

pub use crate::algebraic_constraint::AlgebraicConstraint;
pub use crate::bus_interaction_handler::BusInteractionHandler;

/// Description of a constraint system.
#[derive(Derivative, Serialize)]
#[derivative(Default(bound = ""), Clone)]
#[serde(bound(serialize = "V: Clone + Ord + Eq + Serialize, T: RuntimeConstant + Serialize"))]
pub struct ConstraintSystem<T, V> {
    /// The algebraic expressions which have to evaluate to zero.
    #[serde(rename = "constraints")]
    pub algebraic_constraints: Vec<AlgebraicConstraint<GroupedExpression<T, V>>>,
    /// Bus interactions, which can further restrict variables.
    /// Exact semantics are up to the implementation of BusInteractionHandler
    pub bus_interactions: Vec<BusInteraction<GroupedExpression<T, V>>>,
    /// Newly added variables whose values are derived from existing variables.
    #[serde(rename = "derived_columns")]
    pub derived_variables: Vec<DerivedVariable<T, V, GroupedExpression<T, V>>>,
}

impl<T: RuntimeConstant + Display, V: Clone + Ord + Display> Display for ConstraintSystem<T, V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}",
            self.algebraic_constraints
                .iter()
                .map(|constraint| format!("{constraint}"))
                .chain(
                    self.bus_interactions
                        .iter()
                        .map(|bus_inter| format!("{bus_inter}"))
                )
                .chain(self.derived_variables.iter().map(
                    |DerivedVariable {
                         variable,
                         computation_method,
                     }| { format!("{variable} := {computation_method}") }
                ))
                .format("\n")
        )
    }
}

impl<T: RuntimeConstant, V> ConstraintSystem<T, V> {
    /// Returns all referenced unknown variables in the system. Might contain repetitions.
    ///
    /// Variables referenced in derived variables are not included, as they are not part of the constraints.
    pub fn referenced_unknown_variables(&self) -> impl Iterator<Item = &V> {
        self.algebraic_constraints
            .iter()
            .flat_map(|c| c.referenced_unknown_variables())
            .chain(
                self.bus_interactions
                    .iter()
                    .flat_map(|b| b.referenced_unknown_variables()),
            )
    }

    /// Extends the constraint system by the constraints of another system.
    /// No de-duplication of constraints or disambiguation of variables is performed.
    pub fn extend(&mut self, system: ConstraintSystem<T, V>) {
        self.algebraic_constraints
            .extend(system.algebraic_constraints);
        self.bus_interactions.extend(system.bus_interactions);
        self.derived_variables.extend(system.derived_variables);
    }
}

#[derive(Clone, Debug)]
pub struct DerivedVariable<T, V, E> {
    pub variable: V,
    pub computation_method: ComputationMethod<T, E>,
}

impl<T, V, E> DerivedVariable<T, V, E> {
    pub fn new(variable: V, computation_method: ComputationMethod<T, E>) -> Self {
        Self {
            variable,
            computation_method,
        }
    }
}

impl<T, V, E> Serialize for DerivedVariable<T, V, E>
where
    V: Serialize,
    ComputationMethod<T, E>: Serialize,
{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        (&self.variable, &self.computation_method).serialize(serializer)
    }
}

impl<'de, T, V, E> Deserialize<'de> for DerivedVariable<T, V, E>
where
    V: Deserialize<'de>,
    ComputationMethod<T, E>: Deserialize<'de>,
{
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        let (variable, computation_method) =
            <(V, ComputationMethod<T, E>)>::deserialize(deserializer)?;
        Ok(Self {
            variable,
            computation_method,
        })
    }
}

/// Specifies a way to compute the value of a variable from other variables.
/// It is generic over the field `T` and the expression type `E`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ComputationMethod<T, E> {
    /// A constant value.
    Constant(T),
    /// The quotiont (using inversion in the field) of the first argument
    /// by the second argument, or zero if the latter is zero.
    QuotientOrZero(E, E),
}

impl<T: Display, E: Display> Display for ComputationMethod<T, E> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ComputationMethod::Constant(c) => write!(f, "{c}"),
            ComputationMethod::QuotientOrZero(e1, e2) => write!(f, "QuotientOrZero({e1}, {e2})"),
        }
    }
}

impl<T, F> ComputationMethod<T, GroupedExpression<T, F>> {
    /// Returns the set of referenced unknown variables in the computation method. Might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &F> + '_> {
        match self {
            ComputationMethod::Constant(_) => Box::new(std::iter::empty()),
            ComputationMethod::QuotientOrZero(e1, e2) => Box::new(
                e1.referenced_unknown_variables()
                    .chain(e2.referenced_unknown_variables()),
            ),
        }
    }
}

impl<T: RuntimeConstant + Substitutable<V>, V: Ord + Clone + Eq>
    ComputationMethod<T, GroupedExpression<T, V>>
{
    /// Substitute a variable by a symbolically known expression. The variable can be known or unknown.
    /// If it was already known, it will be substituted in the known expressions.
    pub fn substitute_by_known(&mut self, variable: &V, substitution: &T) {
        match self {
            ComputationMethod::Constant(_) => {}
            ComputationMethod::QuotientOrZero(e1, e2) => {
                e1.substitute_by_known(variable, substitution);
                e2.substitute_by_known(variable, substitution);
            }
        }
    }

    /// Substitute an unknown variable by a GroupedExpression.
    ///
    /// Note this does NOT work properly if the variable is used inside a
    /// known SymbolicExpression.
    pub fn substitute_by_unknown(&mut self, variable: &V, substitution: &GroupedExpression<T, V>) {
        match self {
            ComputationMethod::Constant(_) => {}
            ComputationMethod::QuotientOrZero(e1, e2) => {
                e1.substitute_by_unknown(variable, substitution);
                e2.substitute_by_unknown(variable, substitution);
            }
        }
    }
}

/// A bus interaction.
#[derive(Clone, Debug, Hash, Eq, PartialEq, Serialize)]
pub struct BusInteraction<V> {
    /// The ID of the bus.
    #[serde(rename = "id")]
    pub bus_id: V,
    /// The multiplicity of the bus interaction. In most cases,
    /// this should evaluate to 1 or -1.
    #[serde(rename = "mult")]
    pub multiplicity: V,
    /// The payload of the bus interaction.
    #[serde(rename = "args")]
    pub payload: Vec<V>,
}

impl<V> BusInteraction<V> {
    pub fn fields(&self) -> impl Iterator<Item = &V> {
        Box::new(
            [&self.bus_id, &self.multiplicity]
                .into_iter()
                .chain(self.payload.iter()),
        )
    }

    pub fn fields_mut(&mut self) -> impl Iterator<Item = &mut V> {
        Box::new(
            [&mut self.bus_id, &mut self.multiplicity]
                .into_iter()
                .chain(self.payload.iter_mut()),
        )
    }
}

impl<V: Display> Display for BusInteraction<V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "BusInteraction {{ bus_id: {}, multiplicity: {}, payload: {} }}",
            self.bus_id,
            self.multiplicity,
            self.payload.iter().format(", ")
        )
    }
}

impl<V> FromIterator<V> for BusInteraction<V> {
    fn from_iter<T: IntoIterator<Item = V>>(iter: T) -> Self {
        let mut iter = iter.into_iter();
        let bus_id = iter.next().unwrap();
        let multiplicity = iter.next().unwrap();
        let payload = iter.collect();
        BusInteraction {
            bus_id,
            payload,
            multiplicity,
        }
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> BusInteraction<GroupedExpression<T, V>> {
    /// Converts a bus interactions with fields represented by expressions
    /// to a bus interaction with fields represented by range constraints.
    pub fn to_range_constraints(
        &self,
        range_constraints: &impl RangeConstraintProvider<T::FieldType, V>,
    ) -> BusInteraction<RangeConstraint<T::FieldType>> {
        BusInteraction::from_iter(
            self.fields()
                .map(|expr| expr.range_constraint(range_constraints)),
        )
    }
}

impl<T: FieldElement, V: Clone + Hash + Ord + Eq + Display>
    BusInteraction<GroupedExpression<T, V>>
{
    /// Refines range constraints of the bus interaction's fields
    /// using the provided `BusInteractionHandler`.
    /// Returns a list of updates to be executed by the caller.
    /// Forwards and error by the bus interaction handler.
    pub fn solve(
        &self,
        bus_interaction_handler: &dyn BusInteractionHandler<T>,
        range_constraint_provider: &impl RangeConstraintProvider<T, V>,
    ) -> Result<Vec<Effect<T, V>>, ViolatesBusRules> {
        let range_constraints = self.to_range_constraints(range_constraint_provider);
        let range_constraints =
            bus_interaction_handler.handle_bus_interaction_checked(range_constraints)?;
        Ok(self
            .fields()
            .zip_eq(range_constraints.fields())
            .filter(|(expr, _)| expr.is_affine())
            .flat_map(|(expr, rc)| {
                expr.referenced_unknown_variables().filter_map(move |var| {
                    // `k * var + e` is in range rc <=>
                    // `var` is in range `(rc - RC[e]) / k` = `rc / k + RC[-e / k]`
                    // If we solve `expr` for `var`, we get `-e / k`.
                    let k = expr
                        .coefficient_of_variable_in_affine_part(var)
                        .unwrap()
                        .try_to_number()?;
                    let expr = AlgebraicConstraint::assert_zero(expr).try_solve_for(var)?;
                    let rc = rc
                        .multiple(T::from(1) / k)
                        .combine_sum(&expr.range_constraint(range_constraint_provider));
                    (!rc.is_unconstrained()).then(|| Effect::RangeConstraint(var.clone(), rc))
                })
            })
            .collect())
    }
}

impl<T, V> BusInteraction<GroupedExpression<T, V>> {
    /// Returns the set of referenced unknown variables. Might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &V> + '_> {
        Box::new(
            self.fields()
                .flat_map(|expr| expr.referenced_unknown_variables()),
        )
    }
}

#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
pub enum ConstraintRef<'a, T, V> {
    AlgebraicConstraint(AlgebraicConstraint<&'a GroupedExpression<T, V>>),
    BusInteraction(&'a BusInteraction<GroupedExpression<T, V>>),
}

impl<'a, T, V> ConstraintRef<'a, T, V> {
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &V> + '_> {
        match self {
            ConstraintRef::AlgebraicConstraint(expr) => expr.referenced_unknown_variables(),
            ConstraintRef::BusInteraction(bus_interaction) => {
                bus_interaction.referenced_unknown_variables()
            }
        }
    }
}


================================================
FILE: constraint-solver/src/effect.rs
================================================
use crate::{range_constraint::RangeConstraint, runtime_constant::RuntimeConstant};

/// The effect of solving a symbolic equation.
#[derive(Clone, PartialEq, Eq)]
pub enum Effect<T: RuntimeConstant, V> {
    /// Variable can be assigned a value.
    Assignment(V, T),
    /// We learnt a new range constraint on variable.
    RangeConstraint(V, RangeConstraint<T::FieldType>),
    /// A run-time assertion. If this fails, we have conflicting constraints.
    Assertion(Assertion<T>),
    /// A variable is assigned one of two alternative expressions, depending on a condition.
    ConditionalAssignment {
        variable: V,
        condition: Condition<T>,
        in_range_value: T,
        out_of_range_value: T,
    },
}

/// A run-time assertion. If this fails, we have conflicting constraints.
#[derive(Clone, PartialEq, Eq)]
pub struct Assertion<T: RuntimeConstant> {
    pub lhs: T,
    pub rhs: T,
    /// If this is true, we assert that both sides are equal.
    /// Otherwise, we assert that they are different.
    pub expected_equal: bool,
}

impl<T: RuntimeConstant> Assertion<T> {
    pub fn assert_is_zero<V>(condition: T) -> Effect<T, V> {
        Self::assert_eq(condition, T::from_u64(0))
    }
    pub fn assert_is_nonzero<V>(condition: T) -> Effect<T, V> {
        Self::assert_neq(condition, T::from_u64(0))
    }
    pub fn assert_eq<V>(lhs: T, rhs: T) -> Effect<T, V> {
        Effect::Assertion(Assertion {
            lhs,
            rhs,
            expected_equal: true,
        })
    }
    pub fn assert_neq<V>(lhs: T, rhs: T) -> Effect<T, V> {
        Effect::Assertion(Assertion {
            lhs,
            rhs,
            expected_equal: false,
        })
    }
}

#[derive(Clone, PartialEq, Eq)]
pub struct Condition<T: RuntimeConstant> {
    pub value: T,
    pub condition: RangeConstraint<T::FieldType>,
}


================================================
FILE: constraint-solver/src/grouped_expression.rs
================================================
use std::{
    collections::{BTreeMap, HashMap, HashSet},
    fmt::Display,
    hash::Hash,
    iter::{once, Sum},
    ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub},
};

use crate::runtime_constant::{RuntimeConstant, Substitutable, VarTransformable};
use itertools::Itertools;
use num_traits::One;
use num_traits::Zero;
use powdr_number::FieldElement;
use serde::{Serialize, Serializer};

use super::range_constraint::RangeConstraint;
use super::symbolic_expression::SymbolicExpression;

/// Terms with more than `MAX_SUM_SIZE_FOR_QUADRATIC_ANALYSIS` quadratic terms
/// are not analyzed for pairs that sum to zero.
const MAX_SUM_SIZE_FOR_QUADRATIC_ANALYSIS: usize = 20;

/// A symbolic expression in unknown variables of type `V` and (symbolically)
/// known terms, representing a sum of (super-)quadratic, linear and constant parts.
/// The quadratic terms are of the form `X * Y`, where `X` and `Y` are
/// `GroupedExpression`s that have at least one unknown.
/// The linear terms are of the form `a * X`, where `a` is a (symbolically) known
/// value and `X` is an unknown variable.
/// The constant term is a (symbolically) known value.
///
/// It also provides ways to quickly update the expression when the value of
/// an unknown variable gets known and provides functions to solve
/// (some kinds of) equations.
///
/// The name is derived from the fact that it groups linear terms by variable.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct GroupedExpression<T, V> {
    /// Quadratic terms of the form `a * X * Y`, where `a` is a (symbolically)
    /// known value and `X` and `Y` are grouped expressions that
    /// have at least one unknown.
    quadratic: Vec<(Self, Self)>,
    /// Linear terms of the form `a * X`, where `a` is a (symbolically) known
    /// value and `X` is an unknown variable.
    linear: BTreeMap<V, T>,
    /// Constant term, a (symbolically) known value.
    constant: T,
}

/// A component of a grouped expression.
pub enum GroupedExpressionComponent<T, V> {
    /// A quadratic component `(c1, c1)`, representing `c1 * c2`.
    Quadratic(GroupedExpression<T, V>, GroupedExpression<T, V>),
    /// A linear component `(v, c)`, representing `c * v`.
    Linear(V, T),
    /// A constant component `c`.
    Constant(T),
}

impl<F, T, V> From<GroupedExpressionComponent<T, V>> for GroupedExpression<T, V>
where
    F: FieldElement,
    T: RuntimeConstant<FieldType = F>,
    V: Clone + Ord + Eq,
{
    fn from(s: GroupedExpressionComponent<T, V>) -> Self {
        match s {
            GroupedExpressionComponent::Quadratic(l, r) => Self {
                quadratic: vec![(l, r)],
                linear: Default::default(),
                constant: T::zero(),
            },
            GroupedExpressionComponent::Linear(v, c) => Self {
                quadratic: Default::default(),
                linear: [(v, c)].into_iter().collect(),
                constant: T::zero(),
            },
            GroupedExpressionComponent::Constant(c) => Self {
                quadratic: Default::default(),
                linear: Default::default(),
                constant: c,
            },
        }
    }
}

impl<F: FieldElement, T: RuntimeConstant<FieldType = F>, V> GroupedExpression<T, V> {
    pub fn from_number(k: F) -> Self {
        Self {
            quadratic: Default::default(),
            linear: Default::default(),
            constant: T::from(k),
        }
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Zero for GroupedExpression<T, V> {
    fn zero() -> Self {
        Self {
            quadratic: Default::default(),
            linear: Default::default(),
            constant: T::zero(),
        }
    }

    fn is_zero(&self) -> bool {
        self.try_to_known().is_some_and(|k| k.is_known_zero())
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> One for GroupedExpression<T, V> {
    fn one() -> Self {
        Self {
            quadratic: Default::default(),
            linear: Default::default(),
            constant: T::one(),
        }
    }

    fn is_one(&self) -> bool {
        self.try_to_known().is_some_and(|k| k.is_known_one())
    }
}

impl<F: FieldElement, V: Ord + Clone + Eq> GroupedExpression<SymbolicExpression<F, V>, V> {
    pub fn from_known_symbol(symbol: V, rc: RangeConstraint<F>) -> Self {
        Self::from_runtime_constant(SymbolicExpression::from_symbol(symbol, rc))
    }
}

impl<T: RuntimeConstant, V: Ord + Clone + Eq> GroupedExpression<T, V> {
    pub fn from_runtime_constant(constant: T) -> Self {
        Self {
            quadratic: Default::default(),
            linear: Default::default(),
            constant,
        }
    }

    pub fn from_unknown_variable(var: V) -> Self {
        Self {
            quadratic: Default::default(),
            linear: [(var.clone(), T::one())].into_iter().collect(),
            constant: T::zero(),
        }
    }

    /// If this expression does not contain unknown variables, returns the symbolic expression.
    pub fn try_to_known(&self) -> Option<&T> {
        if self.quadratic.is_empty() && self.linear.is_empty() {
            Some(&self.constant)
        } else {
            None
        }
    }

    /// Returns true if this expression does not contain any quadratic terms.
    pub fn is_affine(&self) -> bool {
        !self.is_quadratic()
    }

    /// If the expression is a known number, returns it.
    pub fn try_to_number(&self) -> Option<T::FieldType> {
        self.try_to_known()?.try_to_number()
    }

    /// If the expression is equal to `GroupedExpression::from_unknown_variable(v)`, returns `v`.
    pub fn try_to_simple_unknown(&self) -> Option<V> {
        if self.is_quadratic() || !self.constant.is_known_zero() {
            return None;
        }
        let Ok((var, coeff)) = self.linear.iter().exactly_one() else {
            return None;
        };
        if !coeff.is_known_one() {
            return None;
        }
        Some(var.clone())
    }

    /// Returns true if this expression contains at least one quadratic term.
    pub fn is_quadratic(&self) -> bool {
        !self.quadratic.is_empty()
    }

    /// Returns `(l, r)` if `self == l * r`.
    pub fn try_as_single_product(&self) -> Option<(&Self, &Self)> {
        if self.linear.is_empty() && self.constant.is_known_zero() {
            match self.quadratic.as_slice() {
                [(l, r)] => Some((l, r)),
                _ => None,
            }
        } else {
            None
        }
    }

    /// Returns `vec![f1, f2, ..., fn]` such that `self` is equivalent to
    /// `c * f1 * f2 * ... * fn` for some constant `c`.
    /// Tries to find as many factors as possible and also tries to normalize
    /// the factors as much as possible.
    pub fn to_factors(&self) -> Vec<Self> {
        let summands = self.quadratic.len()
            + self.linear.len()
            + if self.constant.is_known_zero() { 0 } else { 1 };
        if summands == 0 {
            vec![Self::zero()]
        } else if summands == 1 {
            if let [(l, r)] = self.quadratic.as_slice() {
                l.to_factors().into_iter().chain(r.to_factors()).collect()
            } else if let Some((var, _)) = self.linear.iter().next() {
                vec![Self::from_unknown_variable(var.clone())]
            } else {
                vec![]
            }
        } else {
            // Try to normalize
            let divide_by = if !self.constant.is_known_zero() {
                // If the constant is not zero, we divide by the constant.
                if self.constant.is_known_nonzero() {
                    self.constant.clone()
                } else {
                    T::one()
                }
            } else if !self.linear.is_empty() {
                // Otherwise, we divide by the coefficient of the smallest variable.
                self.linear.iter().next().unwrap().1.clone()
            } else {
                // This is a sum of quadratic expressions, we cannot really normalize this part.
                T::one()
            };
            vec![self.clone() * T::one().field_div(&divide_by)]
        }
    }

    /// Splits this expression into head and tail, i.e., `self = head + tail`
    /// head is the first summand, i.e., either the first quadratic term or the first linear term.
    pub fn try_split_head_tail(mut self) -> Option<(Self, Self)> {
        if !self.quadratic.is_empty() {
            let mut quadratic = self.quadratic.into_iter();
            let (hl, hr) = quadratic.next().unwrap();
            self.quadratic = quadratic.collect();
            Some(((hl * hr), self))
        } else if !self.linear.is_empty() {
            let (hv, hc) = self.linear.pop_first()?;
            Some((GroupedExpressionComponent::Linear(hv, hc).into(), self))
        } else {
            None
        }
    }

    /// Returns the linear components of this expression, i.e. summands that we were
    /// able to determine to be only a runtime constant times a single variable.
    /// If `is_affine()` returns true, this returns all summands except the constant offset.
    /// Otherwise, the variables returned here might also appear inside the higher order terms
    /// and this the dependency on these variables might be more complicated than just a
    /// runtime constant factor.
    pub fn linear_components(
        &self,
    ) -> impl DoubleEndedIterator<Item = (&V, &T)> + ExactSizeIterator<Item = (&V, &T)> + Clone
    {
        self.linear.iter()
    }

    /// Returns the constant offset in this expression.
    pub fn constant_offset(&self) -> &T {
        &self.constant
    }

    /// Returns a slice of the quadratic components of this expression.
    pub fn quadratic_components(&self) -> &[(Self, Self)] {
        &self.quadratic
    }

    /// Turns this expression into an iterator over its summands.
    pub fn into_summands(self) -> impl Iterator<Item = GroupedExpressionComponent<T, V>> {
        self.quadratic
            .into_iter()
            .map(|(l, r)| GroupedExpressionComponent::Quadratic(l, r))
            .chain(
                self.linear
                    .into_iter()
                    .map(|(v, c)| GroupedExpressionComponent::Linear(v, c)),
            )
            .chain(
                (!self.constant.is_zero())
                    .then_some(GroupedExpressionComponent::Constant(self.constant)),
            )
    }

    /// Computes the degree of a GroupedExpression in the unknown variables.
    /// Note that it might overestimate the degree if the expression contains
    /// terms that cancel each other out, e.g. `a * (b + 1) - a * b - a`.
    /// Variables inside runtime constants are ignored.
    pub fn degree(&self) -> usize {
        self.quadratic
            .iter()
            .map(|(l, r)| l.degree() + r.degree())
            .chain((!self.linear.is_empty()).then_some(1))
            .max()
            .unwrap_or(0)
    }

    /// Computes the degree of a variable in this expression.
    /// Variables inside runtime constants are ignored.
    pub fn degree_of_variable(&self, var: &V) -> usize {
        let linear_degree = if self.linear.contains_key(var) { 1 } else { 0 };
        self.quadratic
            .iter()
            .map(|(l, r)| l.degree_of_variable(var) + r.degree_of_variable(var))
            .chain(once(linear_degree))
            .max()
            .unwrap()
    }

    /// Returns the coefficient of the variable `variable` in the affine part of this
    /// expression.
    /// If the expression is affine, this is the actual coefficient of the variable
    /// in the expression. Otherwise, the quadratic part of the expression could
    /// also contain the variable and thus the actual coefficient might be different
    /// (even zero).
    pub fn coefficient_of_variable_in_affine_part<'a>(&'a self, var: &V) -> Option<&'a T> {
        self.linear.get(var)
    }

    /// If `self` contains `var` exactly once in an affine way,
    /// returns `Some((coeff, rest))` where `self = coeff * var + rest`.
    ///
    /// This is relatively expensive because it needs to construct a new
    /// GroupedExpression.
    pub fn try_extract_affine_var(&self, var: V) -> Option<(T, Self)> {
        if self
            .referenced_unknown_variables()
            .filter(|v| *v == &var)
            .count()
            != 1
        {
            return None;
        }
        let coeff = self.linear.get(&var)?.clone();
        let mut rest = self.clone();
        rest.linear.remove(&var);
        Some((coeff, rest))
    }

    /// Returns the range constraint of the full expression.
    pub fn range_constraint(
        &self,
        range_constraints: &impl RangeConstraintProvider<T::FieldType, V>,
    ) -> RangeConstraint<T::FieldType> {
        self.quadratic
            .iter()
            .map(|(l, r)| {
                if l == r {
                    l.range_constraint(range_constraints).square()
                } else {
                    l.range_constraint(range_constraints)
                        .combine_product(&r.range_constraint(range_constraints))
                }
            })
            .chain(self.linear.iter().map(|(var, coeff)| {
                range_constraints
                    .get(var)
                    .combine_product(&coeff.range_constraint())
            }))
            .chain(std::iter::once(self.constant.range_constraint()))
            .reduce(|rc1, rc2| rc1.combine_sum(&rc2))
            .unwrap_or_else(|| RangeConstraint::from_value(0.into()))
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq> GroupedExpression<T, V> {
    pub fn substitute_simple(&mut self, variable: &V, substitution: T) {
        if self.linear.contains_key(variable) {
            let coeff = self.linear.remove(variable).unwrap();
            self.constant += coeff * substitution;
        }

        let mut to_add = GroupedExpression::zero();
        self.quadratic.retain_mut(|(l, r)| {
            l.substitute_simple(variable, substitution);
            r.substitute_simple(variable, substitution);
            match (l.try_to_known(), r.try_to_known()) {
                (Some(l), Some(r)) => {
                    self.constant += *l * *r;
                    false
                }
                (Some(l), None) => {
                    if !l.is_zero() {
                        to_add += r.clone() * l;
                    }
                    false
                }
                (None, Some(r)) => {
                    if !r.is_zero() {
                        to_add += l.clone() * r;
                    }
                    false
                }
                _ => true,
            }
        });
        // remove_quadratic_terms_adding_to_zero(&mut self.quadratic);

        if !to_add.is_zero() {
            *self += to_add;
        }
    }
}
impl<T: RuntimeConstant + Substitutable<V>, V: Ord + Clone + Eq> GroupedExpression<T, V> {
    /// Substitute a variable by a symbolically known expression. The variable can be known or unknown.
    /// If it was already known, it will be substituted in the known expressions.
    pub fn substitute_by_known(&mut self, variable: &V, substitution: &T) {
        self.constant.substitute(variable, substitution);

        if self.linear.contains_key(variable) {
            // If the variable is a key in `linear`, it must be unknown
            // and thus can only occur there. Otherwise, it can be in
            // any symbolic expression.
            // We replace the variable by a symbolic expression, so it goes into the constant part.
            let coeff = self.linear.remove(variable).unwrap();
            self.constant += coeff * substitution.clone();
        } else {
            for coeff in self.linear.values_mut() {
                coeff.substitute(variable, substitution);
            }
            self.linear.retain(|_, f| !f.is_known_zero());
        }

        // TODO can we do that without moving everything?
        // In the end, the order does not matter much.

        let mut to_add = GroupedExpression::zero();
        self.quadratic.retain_mut(|(l, r)| {
            l.substitute_by_known(variable, substitution);
            r.substitute_by_known(variable, substitution);
            match (l.try_to_known(), r.try_to_known()) {
                (Some(l), Some(r)) => {
                    to_add += GroupedExpression::from_runtime_constant(l.clone() * r.clone());
                    false
                }
                (Some(l), None) => {
                    to_add += r.clone() * l;
                    false
                }
                (None, Some(r)) => {
                    to_add += l.clone() * r;
                    false
                }
                _ => true,
            }
        });
        remove_quadratic_terms_adding_to_zero(&mut self.quadratic);

        if to_add.try_to_known().map(|ta| ta.is_known_zero()) != Some(true) {
            *self += to_add;
        }
    }

    /// Substitute an unknown variable by a GroupedExpression.
    ///
    /// Note this does NOT work properly if the variable is used inside a
    /// known SymbolicExpression.
    pub fn substitute_by_unknown(&mut self, variable: &V, substitution: &GroupedExpression<T, V>) {
        if !self.referenced_unknown_variables().any(|v| v == variable) {
            return;
        }

        let mut to_add = GroupedExpression::zero();
        for (var, coeff) in std::mem::take(&mut self.linear) {
            if var == *variable {
                to_add += substitution.clone() * coeff;
            } else {
                self.linear.insert(var, coeff);
            }
        }

        self.quadratic = std::mem::take(&mut self.quadratic)
            .into_iter()
            .filter_map(|(mut l, mut r)| {
                l.substitute_by_unknown(variable, substitution);
                r.substitute_by_unknown(variable, substitution);
                match (l.try_to_known(), r.try_to_known()) {
                    (Some(lval), Some(rval)) => {
                        to_add += Self::from_runtime_constant(lval.clone() * rval.clone());
                        None
                    }
                    (Some(lval), None) => {
                        to_add += r * lval;
                        None
                    }
                    (None, Some(rval)) => {
                        to_add += l * rval;
                        None
                    }
                    _ => Some((l, r)),
                }
            })
            .collect();
        remove_quadratic_terms_adding_to_zero(&mut self.quadratic);

        *self += to_add;
    }
}

impl<T, V> GroupedExpression<T, V> {
    /// Returns the referenced unknown variables. Might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> Box<dyn Iterator<Item = &V> + '_> {
        let quadratic = self.quadratic.iter().flat_map(|(a, b)| {
            a.referenced_unknown_variables()
                .chain(b.referenced_unknown_variables())
        });
        Box::new(quadratic.chain(self.linear.keys()))
    }
}

impl<T: RuntimeConstant + VarTransformable<V1, V2>, V1: Ord + Clone, V2: Ord + Clone>
    VarTransformable<V1, V2> for GroupedExpression<T, V1>
{
    type Transformed = GroupedExpression<T::Transformed, V2>;

    fn try_transform_var_type(
        &self,
        var_transform: &mut impl FnMut(&V1) -> Option<V2>,
    ) -> Option<Self::Transformed> {
        Some(GroupedExpression {
            quadratic: self
                .quadratic
                .iter()
                .map(|(l, r)| {
                    Some((
                        l.try_transform_var_type(var_transform)?,
                        r.try_transform_var_type(var_transform)?,
                    ))
                })
                .collect::<Option<Vec<_>>>()?,
            linear: self
                .linear
                .iter()
                .map(|(var, coeff)| {
                    let new_var = var_transform(var)?;
                    Some((new_var, coeff.try_transform_var_type(var_transform)?))
                })
                .collect::<Option<BTreeMap<_, _>>>()?,
            constant: self.constant.try_transform_var_type(var_transform)?,
        })
    }
}

pub trait RangeConstraintProvider<T: FieldElement, V> {
    fn get(&self, var: &V) -> RangeConstraint<T>;
}

impl<R: RangeConstraintProvider<T, V>, T: FieldElement, V> RangeConstraintProvider<T, V> for &R {
    fn get(&self, var: &V) -> RangeConstraint<T> {
        R::get(self, var)
    }
}

impl<T: FieldElement, V: Eq + Hash> RangeConstraintProvider<T, V>
    for HashMap<V, RangeConstraint<T>>
{
    fn get(&self, var: &V) -> RangeConstraint<T> {
        HashMap::get(self, var).cloned().unwrap_or_default()
    }
}

#[derive(Clone, Copy)]
pub struct NoRangeConstraints;
impl<T: FieldElement, V> RangeConstraintProvider<T, V> for NoRangeConstraints {
    fn get(&self, _var: &V) -> RangeConstraint<T> {
        RangeConstraint::default()
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Add for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn add(mut self, rhs: Self) -> Self {
        self += rhs;
        self
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Add for &GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn add(self, rhs: Self) -> Self::Output {
        self.clone() + rhs.clone()
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> AddAssign<GroupedExpression<T, V>>
    for GroupedExpression<T, V>
{
    fn add_assign(&mut self, rhs: Self) {
        self.quadratic = combine_removing_zeros(std::mem::take(&mut self.quadratic), rhs.quadratic);
        for (var, coeff) in rhs.linear {
            self.linear
                .entry(var.clone())
                .and_modify(|f| *f += coeff.clone())
                .or_insert_with(|| coeff);
        }
        self.constant += rhs.constant.clone();
        self.linear.retain(|_, f| !f.is_known_zero());
    }
}

/// Returns the sum of these quadratic terms while removing terms that
/// cancel each other out.
fn combine_removing_zeros<E: PartialEq>(first: Vec<(E, E)>, mut second: Vec<(E, E)>) -> Vec<(E, E)>
where
    for<'a> &'a E: Neg<Output = E>,
{
    if first.len() + second.len() > MAX_SUM_SIZE_FOR_QUADRATIC_ANALYSIS {
        // If there are too many terms, we cannot do this efficiently.
        return first.into_iter().chain(second).collect();
    }

    let mut result = first
        .into_iter()
        .filter(|first| {
            // Try to find l1 * r1 inside `second`.
            if let Some((j, _)) = second
                .iter()
                .find_position(|second| quadratic_terms_add_to_zero(first, second))
            {
                // We found a match, so they cancel each other out, we remove both.
                second.remove(j);
                false
            } else {
                true
            }
        })
        .collect_vec();
    result.extend(second);
    result
}

/// Removes pairs of items from `terms` whose products add to zero.
fn remove_quadratic_terms_adding_to_zero<E: PartialEq>(terms: &mut Vec<(E, E)>)
where
    for<'a> &'a E: Neg<Output = E>,
{
    if terms.len() > MAX_SUM_SIZE_FOR_QUADRATIC_ANALYSIS {
        // If there are too many terms, we cannot do this efficiently.
        return;
    }

    let mut to_remove = HashSet::new();
    for ((i, first), (j, second)) in terms.iter().enumerate().tuple_combinations() {
        if to_remove.contains(&i) || to_remove.contains(&j) {
            // We already removed this term.
            continue;
        }
        if quadratic_terms_add_to_zero(first, second) {
            // We found a match, so they cancel each other out, we remove both.
            to_remove.insert(i);
            to_remove.insert(j);
        }
    }
    if !to_remove.is_empty() {
        *terms = terms
            .drain(..)
            .enumerate()
            .filter(|(i, _)| !to_remove.contains(i))
            .map(|(_, term)| term)
            .collect();
    }
}

/// Returns true if `first.0 * first.1 = -second.0 * second.1`,
/// but does not catch all cases.
fn quadratic_terms_add_to_zero<E: PartialEq>(first: &(E, E), second: &(E, E)) -> bool
where
    for<'a> &'a E: Neg<Output = E>,
{
    let (s0, s1) = second;
    // Check if `first.0 * first.1 == -(second.0 * second.1)`, but we can swap left and right
    // and we can put the negation either left or right.
    let n1 = (&-s0, s1);
    let n2 = (s0, &-s1);
    [n1, n2].contains(&(&first.0, &first.1)) || [n1, n2].contains(&(&first.1, &first.0))
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Sub for &GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn sub(self, rhs: Self) -> Self::Output {
        self + &-rhs
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Sub for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn sub(self, rhs: Self) -> Self::Output {
        &self - &rhs
    }
}

impl<T: RuntimeConstant, V: Clone + Ord> GroupedExpression<T, V> {
    fn negate(&mut self) {
        for (first, _) in &mut self.quadratic {
            first.negate()
        }
        for coeff in self.linear.values_mut() {
            *coeff = -coeff.clone();
        }
        self.constant = -self.constant.clone();
    }
}

impl<T: RuntimeConstant, V: Clone + Ord> Neg for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn neg(mut self) -> Self {
        self.negate();
        self
    }
}

impl<T: RuntimeConstant, V: Clone + Ord> Neg for &GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn neg(self) -> Self::Output {
        -((*self).clone())
    }
}

/// Multiply by known symbolic expression.
impl<T: RuntimeConstant, V: Clone + Ord + Eq> Mul<&T> for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn mul(mut self, rhs: &T) -> Self {
        self *= rhs;
        self
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Mul<T> for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn mul(self, rhs: T) -> Self {
        self * &rhs
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> MulAssign<&T> for GroupedExpression<T, V> {
    fn mul_assign(&mut self, rhs: &T) {
        if rhs.is_known_zero() {
            *self = Self::zero();
        } else {
            for (first, _) in &mut self.quadratic {
                *first *= rhs;
            }
            for coeff in self.linear.values_mut() {
                *coeff *= rhs.clone();
            }
            self.constant *= rhs.clone();
        }
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Sum for GroupedExpression<T, V> {
    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
        iter.fold(Self::zero(), |mut acc, item| {
            acc += item;
            acc
        })
    }
}

impl<T: RuntimeConstant, V: Clone + Ord + Eq> Mul for GroupedExpression<T, V> {
    type Output = GroupedExpression<T, V>;

    fn mul(self, rhs: GroupedExpression<T, V>) -> Self {
        if let Some(k) = rhs.try_to_known() {
            self * k
        } else if let Some(k) = self.try_to_known() {
            rhs * k
        } else {
            Self {
                quadratic: vec![(self, rhs)],
                linear: Default::default(),
                constant: T::zero(),
            }
        }
    }
}

impl<T: RuntimeConstant + Display, V: Clone + Ord + Display> Display for GroupedExpression<T, V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let (sign, s) = self.to_signed_string();
        if sign {
            write!(f, "-({s})")
        } else {
            write!(f, "{s}")
        }
    }
}

impl<T: RuntimeConstant + Display, V: Clone + Ord + Display> GroupedExpression<T, V> {
    fn to_signed_string(&self) -> (bool, String) {
        self.quadratic
            .iter()
            .map(|(a, b)| {
                let (a_sign, a) = a.to_signed_string();
                let (b_sign, b) = b.to_signed_string();
                (a_sign ^ b_sign, format!("({a}) * ({b})"))
            })
            .chain(
                self.linear
                    .iter()
                    .map(|(var, coeff)| match coeff.try_to_number() {
                        Some(k) if k == T::FieldType::one() => (false, format!("{var}")),
                        Some(k) if k == -T::FieldType::one() => (true, format!("{var}")),
                        _ => {
                            let (sign, coeff) = Self::symbolic_expression_to_signed_string(coeff);
                            (sign, format!("{coeff} * {var}"))
                        }
                    }),
            )
            .chain(match self.constant.try_to_number() {
                Some(k) if k == T::FieldType::zero() => None,
                _ => Some(Self::symbolic_expression_to_signed_string(&self.constant)),
            })
            .reduce(|(n1, p1), (n2, p2)| {
                (
                    n1,
                    if n1 == n2 {
                        format!("{p1} + {p2}")
                    } else {
                        format!("{p1} - {p2}")
                    },
                )
            })
            .unwrap_or((false, "0".to_string()))
    }

    fn symbolic_expression_to_signed_string(value: &T) -> (bool, String) {
        match value.try_to_number() {
            Some(k) => {
                if k.is_in_lower_half() {
                    (false, format!("{k}"))
                } else {
                    (true, format!("{}", -k))
                }
            }
            _ => (false, value.to_string()),
        }
    }
}

impl<T: RuntimeConstant + Serialize, V: Ord + Clone + Eq + Serialize> Serialize
    for GroupedExpression<T, V>
{
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        let summands = self.clone().into_summands().collect::<Vec<_>>();
        if summands.is_empty() {
            T::zero().serialize(serializer)
        } else {
            SumSerializer::new(&summands).serialize(serializer)
        }
    }
}

/// Serializes [1, 2, 3] into ((1, "+", 2), "+", 3),
struct SumSerializer<'a, I> {
    items: &'a [I],
}

impl<'a, I> SumSerializer<'a, I> {
    pub fn new(items: &'a [I]) -> Self {
        assert!(!items.is_empty());
        Self { items }
    }
}

impl<'a, I: Serialize> Serialize for SumSerializer<'a, I> {
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        let (last, beginning) = self.items.split_last().unwrap();
        if beginning.is_empty() {
            last.serialize(serializer)
        } else {
            (&SumSerializer { items: beginning }, "+", last).serialize(serializer)
        }
    }
}

impl<T: RuntimeConstant + Serialize, V: Ord + Clone + Eq + Serialize> Serialize
    for GroupedExpressionComponent<T, V>
{
    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        match self {
            GroupedExpressionComponent::Quadratic(l, r) => (l, "*", r).serialize(serializer),
            GroupedExpressionComponent::Linear(v, c) => {
                if c.is_one() {
                    v.serialize(serializer)
                } else {
                    (c, "*", v).serialize(serializer)
                }
            }
            GroupedExpressionComponent::Constant(c) => c.serialize(serializer),
        }
    }
}

#[cfg(test)]
mod tests {

    use std::collections::HashMap;

    use crate::{
        symbolic_expression::SymbolicExpression,
        test_utils::{constant, var},
    };

    use super::*;
    use expect_test::expect;
    use powdr_number::GoldilocksField;

    use pretty_assertions::assert_eq;

    type Qse = GroupedExpression<SymbolicExpression<GoldilocksField, &'static str>, &'static str>;

    #[test]
    fn test_mul() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let t = x * y + a;
        assert_eq!(t.to_string(), "(X) * (Y) + A");
    }

    #[test]
    fn test_add() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_unknown_variable("A");
        let b = Qse::from_known_symbol("B", RangeConstraint::default());
        let t: Qse = x * y - a + b;
        assert_eq!(t.to_string(), "(X) * (Y) - A + B");
        assert_eq!(
            (t.clone() + t).to_string(),
            "(X) * (Y) + (X) * (Y) - 2 * A + (B + B)"
        );
    }

    #[test]
    fn test_mul_by_known() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let b = Qse::from_known_symbol("B", RangeConstraint::default());
        let t: Qse = (x * y + a) * b;
        assert_eq!(t.to_string(), "(B * X) * (Y) + (A * B)");
    }

    #[test]
    fn test_mul_by_zero() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let zero = Qse::zero();
        let t: Qse = x * y + a;
        assert_eq!(t.to_string(), "(X) * (Y) + A");
        assert_eq!((t.clone() * zero).to_string(), "0");
    }

    #[test]
    fn test_apply_update() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let b = Qse::from_known_symbol("B", RangeConstraint::default());
        let mut t: Qse = (x * y + a) * b;
        assert_eq!(t.to_string(), "(B * X) * (Y) + (A * B)");
        t.substitute_by_known(
            &"B",
            &SymbolicExpression::from_symbol("B", RangeConstraint::from_value(7.into())),
        );
        assert!(t.is_quadratic());
        assert_eq!(t.to_string(), "(7 * X) * (Y) + (A * 7)");
        t.substitute_by_known(
            &"X",
            &SymbolicExpression::from_symbol("X", RangeConstraint::from_range(1.into(), 2.into())),
        );
        assert!(!t.is_quadratic());
        assert_eq!(t.to_string(), "(7 * X) * Y + (A * 7)");
        t.substitute_by_known(
            &"Y",
            &SymbolicExpression::from_symbol("Y", RangeConstraint::from_value(3.into())),
        );
        assert!(t.try_to_known().is_some());
        assert_eq!(t.to_string(), "((A * 7) + ((7 * X) * 3))");
    }

    #[test]
    fn test_apply_update_inner_zero() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let b = Qse::from_known_symbol("B", RangeConstraint::default());
        let mut t: Qse = (x * a + y) * b;
        assert_eq!(t.to_string(), "(A * B) * X + B * Y");
        t.substitute_by_known(
            &"B",
            &SymbolicExpression::from_symbol("B", RangeConstraint::from_value(7.into())),
        );
        assert_eq!(t.to_string(), "(A * 7) * X + 7 * Y");
        t.substitute_by_known(
            &"A",
            &SymbolicExpression::from_symbol("A", RangeConstraint::from_value(0.into())),
        );
        assert_eq!(t.to_string(), "7 * Y");
    }

    #[test]
    fn substitute_known() {
        let x = Qse::from_unknown_variable("X");
        let y = Qse::from_unknown_variable("Y");
        let a = Qse::from_known_symbol("A", RangeConstraint::default());
        let b = Qse::from_known_symbol("B", RangeConstraint::default());
        let mut t: Qse = (x * a + y) * b.clone() + b;
        assert_eq!(t.to_string(), "(A * B) * X + B * Y + B");
        // We substitute B by an expression containing B on purpose.
        t.substitute_by_known(
            &"B",
            &(SymbolicExpression::from_symbol("B", Default::default())
                + SymbolicExpression::from(GoldilocksField::from(1))),
        );
        assert_eq!(t.to_string(), "(A * (B + 1)) * X + (B + 1) * Y + (B + 1)");
        t.substitute_by_known(
            &"B",
            &SymbolicExpression::from_symbol("B", RangeConstraint::from_value(10.into())),
        );
        assert_eq!(t.to_string(), "(A * 11) * X + 11 * Y + 11");
    }

    #[test]
    fn test_substitute_by_unknown_basic_replacement() {
        let mut expr = var("a");
        let subst = var("x");

        expr.substitute_by_unknown(&"a", &subst);
        assert_eq!(expr.to_string(), "x");
    }

    #[test]
    fn test_substitute_by_unknown_linear_to_quadratic() {
        let mut expr = var("x");
        let subst = var("y") * var("z") + constant(3);
        expr.substitute_by_unknown(&"x", &subst);

        assert!(expr.is_quadratic());
        assert_eq!(expr.to_string(), "(y) * (z) + 3");
    }

    #[test]
    fn test_substitute_by_unknown_inside_quadratic() {
        let mut expr = var("x") * var("y");
        let subst = var("a") + constant(1);

        expr.substitute_by_unknown(&"x", &subst);
        assert!(expr.is_quadratic());
        assert_eq!(expr.to_string(), "(a + 1) * (y)");
    }

    #[test]
    fn test_substitute_by_unknown_linear() {
        let mut expr = var("x") + var("y");
        let subst = var("a") + var("b");

        expr.substitute_by_unknown(&"x", &subst);
        assert!(!expr.is_quadratic());
        assert_eq!(expr.linear_components().count(), 3);
        assert_eq!(expr.to_string(), "a + b + y");
    }

    #[test]
    fn test_complex_expression_multiple_substitution() {
        let mut expr = (var("x") * var("w")) + var("x") + constant(3) * var("y") + constant(5);
        assert_eq!(expr.to_string(), "(x) * (w) + x + 3 * y + 5");

        let subst = var("a") * var("b") + constant(1);

        expr.substitute_by_unknown(&"x", &subst);

        assert_eq!(
            expr.to_string(),
            "((a) * (b) + 1) * (w) + (a) * (b) + 3 * y + 6"
        );

        // Structural validation
        let [first_quadratic, second_quadratic] = expr
            .quadratic_components()
            .iter()
            .cloned()
            .collect_vec()
            .try_into()
            .unwrap();

        assert_eq!(first_quadratic.0.to_string(), "(a) * (b) + 1");
        let inner_quadratic = first_quadratic.0.quadratic_components();
        assert_eq!(inner_quadratic[0].0.to_string(), "a");
        assert_eq!(inner_quadratic[0].1.to_string(), "b");
        assert!(first_quadratic.0.linear_components().count() == 0);
        assert_eq!(
            first_quadratic.0.constant_offset().try_to_number(),
            Some(GoldilocksField::from(1)),
        );
        assert_eq!(first_quadratic.1.to_string(), "w");

        assert_eq!(second_quadratic.0.to_string(), "a");
        assert_eq!(second_quadratic.1.to_string(), "b");

        let [linear] = expr.linear_components().collect_vec().try_into().unwrap();
        assert_eq!(linear.0.to_string(), "y");
        assert_eq!(
            expr.constant_offset().try_to_number(),
            Some(GoldilocksField::from(6)),
        );
    }

    #[test]
    fn test_substitute_by_unknown_coeff_distribution() {
        let mut expr = constant(2) * var("a") + constant(7);
        assert_eq!(expr.to_string(), "2 * a + 7");

        let subst = var("x") * var("y");

        expr.substitute_by_unknown(&"a", &subst);

        assert_eq!(expr.to_string(), "(2 * x) * (y) + 7");

        let quadratic = expr.quadratic_components();
        assert_eq!(quadratic.len(), 1);
        assert_eq!(quadratic[0].0.to_string(), "2 * x");
        assert_eq!(quadratic[0].1.to_string(), "y");
        assert!(expr.linear_components().next().is_none());
        assert_eq!(
            expr.constant_offset().try_to_number(),
            Some(GoldilocksField::from(7))
        );
    }

    #[test]
    fn combine_removing_zeros() {
        let a = var("x") * var("y") + var("z") * constant(3);
        let b = var("t") * var("u") + constant(5) + var("y") * var("x");
        assert_eq!(
            (a.clone() - b.clone()).to_string(),
            "-((t) * (u) - 3 * z + 5)"
        );
        assert_eq!((b - a).to_string(), "(t) * (u) - 3 * z + 5");
    }

    #[test]
    fn remove_quadratic_zeros_after_substitution() {
        let a = var("x") * var("r") + var("z") * constant(3);
        let b = var("t") * var("u") + constant(5) + var("y") * var("x");
        let mut t = b - a;
        // Cannot simplify yet, because the terms are different
        assert_eq!(
            t.to_string(),
            "(t) * (u) + (y) * (x) - (x) * (r) - 3 * z + 5"
        );
        t.substitute_by_unknown(&"r", &var("y"));
        // Now the first term in `a` is equal to the last in `b`.
        assert_eq!(t.to_string(), "(t) * (u) - 3 * z + 5");
    }

    #[test]
    fn to_factors() {
        let expr = (constant(3) * var("x"))
            * -var("y")
            * constant(3)
            * (constant(5) * var("z") + constant(5))
            * (constant(2) * var("t") + constant(4) * var("z"))
            * (var("t") * constant(2));
        assert_eq!(
            expr.to_string(),
            "-(((((9 * x) * (y)) * (5 * z + 5)) * (2 * t + 4 * z)) * (2 * t))"
        );
        let factors = expr.to_factors().into_iter().format(", ").to_string();
        assert_eq!(factors, "x, y, z + 1, t + 2 * z, t");
    }

    #[test]
    fn rc_of_square() {
        let expr = (var("x") * var("x")) + constant(3);
        let rc1 = HashMap::from([("x", RangeConstraint::from_range(1.into(), 2.into()))]);
        expect!("[4, 7] & 0x7").assert_eq(&expr.range_constraint(&rc1).to_string());
        let rc2 = HashMap::from([(
            "x",
            RangeConstraint::from_range(-GoldilocksField::from(5), 3.into()),
        )]);
        expect!("[3, 28] & 0x1f").assert_eq(&expr.range_constraint(&rc2).to_string());
        let rc3 = HashMap::from([(
            "x",
            RangeConstraint::from_range(-GoldilocksField::from(3), 5.into()),
        )]);
        expect!("[3, 28] & 0x1f").assert_eq(&expr.range_constraint(&rc3).to_string());
    }

    #[test]
    fn serialize_sum() {
        let expr = [1, 2, 3];
        let serialized = serde_json::to_string(&SumSerializer::new(&expr)).unwrap();
        expect!(r#"[[1,"+",2],"+",3]"#).assert_eq(&serialized);

        let expr = [1];
        let serialized = serde_json::to_string(&SumSerializer::new(&expr)).unwrap();
        expect!("1").assert_eq(&serialized);
    }

    #[test]
    fn serialize_grouped_expression() {
        let x: GroupedExpression<GoldilocksField, &str> =
            GroupedExpression::from_unknown_variable("X");
        let four = GroupedExpression::from_runtime_constant(GoldilocksField::from(4));
        let expr = four.clone() * (x.clone() * x.clone()) + four.clone() * x.clone() + four;
        let serialized = serde_json::to_string(&expr).unwrap();
        expect!([r#"[[[[4,"*","X"],"*","X"],"+",[4,"*","X"]],"+",4]"#]).assert_eq(&serialized);
    }

    #[test]
    fn serialize_zero() {
        let expr: GroupedExpression<GoldilocksField, &str> = GroupedExpression::zero();
        let serialized = serde_json::to_string(&expr).unwrap();
        expect!("0").assert_eq(&serialized);
    }
}


================================================
FILE: constraint-solver/src/indexed_constraint_system.rs
================================================
use std::{
    cmp,
    collections::{BTreeSet, HashMap, VecDeque},
    fmt::Display,
    hash::Hash,
};

use bitvec::vec::BitVec;
use derivative::Derivative;
use itertools::Itertools;

use crate::{
    constraint_system::{
        AlgebraicConstraint, BusInteraction, ConstraintRef, ConstraintSystem, DerivedVariable,
    },
    grouped_expression::GroupedExpression,
    runtime_constant::{RuntimeConstant, Substitutable},
};

/// Applies multiple substitutions to a ConstraintSystem in an efficient manner.
pub fn apply_substitutions<T: RuntimeConstant + Substitutable<V>, V: Hash + Eq + Clone + Ord>(
    constraint_system: ConstraintSystem<T, V>,
    substitutions: impl IntoIterator<Item = (V, GroupedExpression<T, V>)>,
) -> ConstraintSystem<T, V> {
    let mut indexed_constraint_system = IndexedConstraintSystem::from(constraint_system);
    indexed_constraint_system.apply_substitutions(substitutions);
    indexed_constraint_system.into()
}

/// Applies multiple substitutions to all expressions in a sequence of expressions.
pub fn apply_substitutions_to_expressions<
    T: RuntimeConstant + Substitutable<V>,
    V: Hash + Eq + Clone + Ord,
>(
    expressions: impl IntoIterator<Item = GroupedExpression<T, V>>,
    substitutions: impl IntoIterator<Item = (V, GroupedExpression<T, V>)>,
) -> Vec<GroupedExpression<T, V>> {
    apply_substitutions(
        ConstraintSystem {
            algebraic_constraints: expressions
                .into_iter()
                .map(AlgebraicConstraint::assert_zero)
                .collect(),
            bus_interactions: Vec::new(),
            derived_variables: Vec::new(),
        },
        substitutions,
    )
    .algebraic_constraints
    .into_iter()
    .map(|constraint| constraint.expression)
    .collect()
}

/// Structure on top of a [`ConstraintSystem`] that stores indices
/// to more efficiently update the constraints.
#[derive(Derivative)]
#[derivative(Default(bound = ""), Clone)]
pub struct IndexedConstraintSystem<T, V> {
    /// The constraint system.
    constraint_system: ConstraintSystem<T, V>,
    /// Stores where each unknown variable appears.
    variable_occurrences: HashMap<V, BTreeSet<ConstraintSystemItem>>,
}

/// Structure on top of [`IndexedConstraintSystem`] that
/// tracks changes to variables and how they may affect constraints.
///
/// In particular, the assumption is that items in the constraint system
/// need to be "handled". Initially, all items need to be "handled"
/// and are put in a queue. Handling an item can cause an update to a variable,
/// which causes all constraints referencing that variable to be put back into the
/// queue.
#[derive(Derivative)]
#[derivative(Default(bound = ""), Clone)]
pub struct IndexedConstraintSystemWithQueue<T, V> {
    constraint_system: IndexedConstraintSystem<T, V>,
    queue: ConstraintSystemQueue,
}

/// A reference to an item in the constraint system, based on the index.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
enum ConstraintSystemItem {
    /// A reference to an algebraic constraint.
    AlgebraicConstraint(usize),
    /// A reference to a bus interaction.
    BusInteraction(usize),
    /// A reference to a derived variable. This is only used internally to the
    /// IndexedConstraintSystem.
    DerivedVariable(usize),
}

impl ConstraintSystemItem {
    /// Returns an index that is unique across both algebraic constraints and bus interactions.
    /// Panics for derived variables.
    fn flat_constraint_id(&self) -> usize {
        match self {
            ConstraintSystemItem::AlgebraicConstraint(i) => 2 * i,
            ConstraintSystemItem::BusInteraction(i) => 2 * i + 1,
            ConstraintSystemItem::DerivedVariable(_) => panic!(),
        }
    }

    /// Returns the index of the item. Note that the indices are not disjoint between different kinds
    /// of items.
    fn index(&self) -> usize {
        match self {
            ConstraintSystemItem::AlgebraicConstraint(index)
            | ConstraintSystemItem::BusInteraction(index)
            | ConstraintSystemItem::DerivedVariable(index) => *index,
        }
    }

    /// Returns true if this constraint system item is a derived variable instead of an actual constraint.
    fn is_derived_variable(&self) -> bool {
        matches!(self, ConstraintSystemItem::DerivedVariable(_))
    }

    /// Turns this indexed-based item into a reference to the actual constraint.
    /// Fails for derived variables.
    fn try_to_constraint_ref<'a, T, V>(
        self,
        constraint_system: &'a ConstraintSystem<T, V>,
    ) -> Option<ConstraintRef<'a, T, V>> {
        match self {
            ConstraintSystemItem::AlgebraicConstraint(i) => {
                Some(ConstraintRef::AlgebraicConstraint(
                    constraint_system.algebraic_constraints[i].as_ref(),
                ))
            }
            ConstraintSystemItem::BusInteraction(i) => Some(ConstraintRef::BusInteraction(
                &constraint_system.bus_interactions[i],
            )),
            ConstraintSystemItem::DerivedVariable(_) => None,
        }
    }
}

impl<T: RuntimeConstant, V: Hash + Eq + Clone + Ord> From<ConstraintSystem<T, V>>
    for IndexedConstraintSystem<T, V>
{
    fn from(constraint_system: ConstraintSystem<T, V>) -> Self {
        let variable_occurrences = variable_occurrences(&constraint_system);
        IndexedConstraintSystem {
            constraint_system,
            variable_occurrences,
        }
    }
}

impl<T: RuntimeConstant, V: Clone + Eq> From<IndexedConstraintSystem<T, V>>
    for ConstraintSystem<T, V>
{
    fn from(indexed_constraint_system: IndexedConstraintSystem<T, V>) -> Self {
        indexed_constraint_system.constraint_system
    }
}

impl<T: RuntimeConstant, V: Clone + Eq> IndexedConstraintSystem<T, V> {
    pub fn system(&self) -> &ConstraintSystem<T, V> {
        &self.constraint_system
    }

    pub fn algebraic_constraints(&self) -> &[AlgebraicConstraint<GroupedExpression<T, V>>] {
        &self.constraint_system.algebraic_constraints
    }

    pub fn bus_interactions(&self) -> &[BusInteraction<GroupedExpression<T, V>>] {
        &self.constraint_system.bus_interactions
    }

    /// Returns all (unknown) variables in the system. Might contain variables
    /// that do not appear in the system any more (because the constraints were deleted).
    /// Does not contain repetitions and is very efficient but returns the variables in a
    /// non-deterministic order.
    pub fn variables(&self) -> impl Iterator<Item = &V> {
        self.variable_occurrences.keys()
    }

    /// Returns all (unknown) variables that occur in the system in a deterministic order
    /// but might contain repetitions.
    pub fn referenced_unknown_variables(&self) -> impl Iterator<Item = &V> {
        self.constraint_system.referenced_unknown_variables()
    }

    /// Removes all constraints that do not fulfill the predicate.
    pub fn retain_algebraic_constraints(
        &mut self,
        mut f: impl FnMut(&AlgebraicConstraint<GroupedExpression<T, V>>) -> bool,
    ) {
        retain(
            &mut self.constraint_system.algebraic_constraints,
            &mut self.variable_occurrences,
            &mut f,
            ConstraintSystemItem::AlgebraicConstraint,
        );
    }

    /// Removes all bus interactions that do not fulfill the predicate.
    pub fn retain_bus_interactions(
        &mut self,
        mut f: impl FnMut(&BusInteraction<GroupedExpression<T, V>>) -> bool,
    ) {
        retain(
            &mut self.constraint_system.bus_interactions,
            &mut self.variable_occurrences,
            &mut f,
            ConstraintSystemItem::BusInteraction,
        );
    }

    /// Removes all derived variables that do not fulfill the predicate.
    pub fn retain_derived_variables(
        &mut self,
        mut f: impl FnMut(&DerivedVariable<T, V, GroupedExpression<T, V>>) -> bool,
    ) {
        retain(
            &mut self.constraint_system.derived_variables,
            &mut self.variable_occurrences,
            &mut f,
            ConstraintSystemItem::DerivedVariable,
        );
    }
}

/// Behaves like `list.retain(f)` but also updates the variable occurrences
/// in `occurrences`. Note that `constraint_kind_constructor` is used to
/// create the `ConstraintSystemItem` for the occurrences, so it should
/// match the type of the items in `list`.
fn retain<V, Item>(
    list: &mut Vec<Item>,
    occurrences: &mut HashMap<V, BTreeSet<ConstraintSystemItem>>,
    mut f: impl FnMut(&Item) -> bool,
    constraint_kind_constructor: impl Fn(usize) -> ConstraintSystemItem + Copy,
) {
    let mut counter = 0usize;
    // `replacement_map[i]` = `Some(j)` if item at index `i` is kept and is now at index `j`
    let mut replacement_map = vec![];
    list.retain(|c| {
        let retain = f(c);
        if retain {
            replacement_map.push(Some(counter));
            counter += 1;
        } else {
            replacement_map.push(None);
        }
        retain
    });
    assert_eq!(counter, list.len());
    // We call it once on zero just to find out which enum variant it returns,
    // so we can compare the discriminants below.
    let discriminant = std::mem::discriminant(&constraint_kind_constructor(0));
    occurrences.values_mut().for_each(|occurrences| {
        *occurrences = occurrences
            .iter()
            .filter_map(|item| {
                if std::mem::discriminant(item) == discriminant {
                    // We have an item of the kind we are modifying, so apply
                    // the replacement map
                    replacement_map[item.index()].map(constraint_kind_constructor)
                } else {
                    // This is a constraint of the wrong kind, do not modify it.
                    Some(*item)
                }
            })
            .collect();
    });
    occurrences.retain(|_, occurrences| !occurrences.is_empty());
}

impl<T: RuntimeConstant, V: Clone + Eq + Hash> IndexedConstraintSystem<T, V> {
    /// Adds new algebraic constraints to the system.
    pub fn add_algebraic_constraints(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) {
        self.extend(ConstraintSystem {
            algebraic_constraints: constraints.into_iter().collect(),
            bus_interactions: Vec::new(),
            derived_variables: Vec::new(),
        });
    }

    /// Adds new bus interactions to the system.
    pub fn add_bus_interactions(
        &mut self,
        bus_interactions: impl IntoIterator<Item = BusInteraction<GroupedExpression<T, V>>>,
    ) {
        self.extend(ConstraintSystem {
            algebraic_constraints: Vec::new(),
            bus_interactions: bus_interactions.into_iter().collect(),
            derived_variables: Vec::new(),
        });
    }

    /// Extends the constraint system by the constraints of another system.
    pub fn extend(&mut self, system: ConstraintSystem<T, V>) {
        let algebraic_constraint_count = self.constraint_system.algebraic_constraints.len();
        let bus_interactions_count = self.constraint_system.bus_interactions.len();
        let derived_variables_count = self.constraint_system.derived_variables.len();
        // Compute the occurrences of the variables in the new constraints,
        // but update their indices.
        // Iterating over hash map here is fine because we are just extending another hash map.
        #[allow(clippy::iter_over_hash_type)]
        for (variable, occurrences) in variable_occurrences(&system) {
            let occurrences = occurrences.into_iter().map(|item| match item {
                ConstraintSystemItem::AlgebraicConstraint(i) => {
                    ConstraintSystemItem::AlgebraicConstraint(i + algebraic_constraint_count)
                }
                ConstraintSystemItem::BusInteraction(i) => {
                    ConstraintSystemItem::BusInteraction(i + bus_interactions_count)
                }
                ConstraintSystemItem::DerivedVariable(i) => {
                    ConstraintSystemItem::DerivedVariable(i + derived_variables_count)
                }
            });
            self.variable_occurrences
                .entry(variable)
                .or_default()
                .extend(occurrences);
        }
        self.constraint_system.extend(system)
    }
}

impl<T: RuntimeConstant, V: Hash + Ord + Eq> IndexedConstraintSystem<T, V> {
    /// Returns a list of all constraints that contain at least one of the given variables.
    pub fn constraints_referencing_variables<'a>(
        &'a self,
        variables: impl IntoIterator<Item = &'a V> + 'a,
    ) -> impl Iterator<Item = ConstraintRef<'a, T, V>> + 'a {
        variables
            .into_iter()
            .filter_map(|v| self.variable_occurrences.get(v))
            .flatten()
            .unique()
            .flat_map(|&item| item.try_to_constraint_ref(&self.constraint_system))
    }
}

impl<T: RuntimeConstant + Substitutable<V>, V: Clone + Hash + Ord + Eq>
    IndexedConstraintSystem<T, V>
{
    /// Substitutes a variable with a symbolic expression in the whole system
    pub fn substitute_by_known(&mut self, variable: &V, substitution: &T) {
        // Since we substitute by a known value, we do not need to update variable_occurrences.
        for item in self
            .variable_occurrences
            .get(variable)
            .unwrap_or(&BTreeSet::new())
        {
            substitute_by_known_in_item(&mut self.constraint_system, *item, variable, substitution);
        }
    }

    /// Substitute an unknown variable by a GroupedExpression in the whole system.
    ///
    /// Note this does NOT work properly if the variable is used inside a
    /// known SymbolicExpression.
    ///
    /// It does not delete the occurrence of `variable` so that it can be used to check
    /// which constraints it used to occur in.
    pub fn substitute_by_unknown(&mut self, variable: &V, substitution: &GroupedExpression<T, V>) {
        let items = self
            .variable_occurrences
            .get(variable)
            .cloned()
            .unwrap_or(BTreeSet::new());
        for item in &items {
            substitute_by_unknown_in_item(
                &mut self.constraint_system,
                *item,
                variable,
                substitution,
            );
        }

        // We just add all variables in the substitution to the items.
        // It might be that cancellations occur, but we assume it is not worth the overhead.
        for var in substitution.referenced_unknown_variables().unique() {
            self.variable_occurrences
                .entry(var.clone())
                .or_default()
                .extend(items.iter().cloned());
        }
    }

    /// Applies multiple substitutions to the constraint system in an efficient manner.
    pub fn apply_substitutions(
        &mut self,
        substitutions: impl IntoIterator<Item = (V, GroupedExpression<T, V>)>,
    ) {
        // We do not track substitutions yet, but we could.
        for (variable, substitution) in substitutions {
            self.substitute_by_unknown(&variable, &substitution);
        }
    }
}

/// Returns a hash map mapping all unknown variables in the constraint system
/// to the items they occur in.
fn variable_occurrences<T: RuntimeConstant, V: Hash + Eq + Clone>(
    constraint_system: &ConstraintSystem<T, V>,
) -> HashMap<V, BTreeSet<ConstraintSystemItem>> {
    let occurrences_in_algebraic_constraints = constraint_system
        .algebraic_constraints
        .iter()
        .enumerate()
        .flat_map(|(i, constraint)| {
            constraint
                .referenced_unknown_variables()
                .unique()
                .map(move |v| (v.clone(), ConstraintSystemItem::AlgebraicConstraint(i)))
        });
    let occurrences_in_bus_interactions = constraint_system
        .bus_interactions
        .iter()
        .enumerate()
        .flat_map(|(i, bus_interaction)| {
            bus_interaction
                .fields()
                .flat_map(|c| c.referenced_unknown_variables())
                .unique()
                .map(move |v| (v.clone(), ConstraintSystemItem::BusInteraction(i)))
        });
    let occurrences_in_derived_variables = constraint_system
        .derived_variables
        .iter()
        .enumerate()
        // We ignore the derived variable itself because it is not a constraint
        // and does not matter in substitutions (if we substitute the derived
        // variable it is deleted in a later step).
        .flat_map(
            |(
                i,
                DerivedVariable {
                    computation_method, ..
                },
            )| {
                computation_method
                    .referenced_unknown_variables()
                    .unique()
                    .map(move |v| (v.clone(), ConstraintSystemItem::DerivedVariable(i)))
            },
        );
    occurrences_in_algebraic_constraints
        .chain(occurrences_in_bus_interactions)
        .chain(occurrences_in_derived_variables)
        .into_grouping_map()
        .collect()
}

fn substitute_by_known_in_item<T: RuntimeConstant + Substitutable<V>, V: Ord + Clone + Eq>(
    constraint_system: &mut ConstraintSystem<T, V>,
    item: ConstraintSystemItem,
    variable: &V,
    substitution: &T,
) {
    match item {
        ConstraintSystemItem::AlgebraicConstraint(i) => {
            constraint_system.algebraic_constraints[i]
                .expression
                .substitute_by_known(variable, substitution);
        }
        ConstraintSystemItem::BusInteraction(i) => {
            constraint_system.bus_interactions[i]
                .fields_mut()
                .for_each(|expr| expr.substitute_by_known(variable, substitution));
        }
        ConstraintSystemItem::DerivedVariable(i) => constraint_system.derived_variables[i]
            .computation_method
            .substitute_by_known(variable, substitution),
    }
}

fn substitute_by_unknown_in_item<T: RuntimeConstant + Substitutable<V>, V: Ord + Clone + Eq>(
    constraint_system: &mut ConstraintSystem<T, V>,
    item: ConstraintSystemItem,
    variable: &V,
    substitution: &GroupedExpression<T, V>,
) {
    match item {
        ConstraintSystemItem::AlgebraicConstraint(i) => {
            constraint_system.algebraic_constraints[i]
                .expression
                .substitute_by_unknown(variable, substitution);
        }
        ConstraintSystemItem::BusInteraction(i) => {
            constraint_system.bus_interactions[i]
                .fields_mut()
                .for_each(|expr| expr.substitute_by_unknown(variable, substitution));
        }
        ConstraintSystemItem::DerivedVariable(i) => constraint_system.derived_variables[i]
            .computation_method
            .substitute_by_unknown(variable, substitution),
    }
}

impl<T: RuntimeConstant + Display, V: Clone + Ord + Display + Hash> Display
    for IndexedConstraintSystem<T, V>
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.constraint_system)
    }
}

impl<T: RuntimeConstant, V: Hash + Eq + Clone + Ord, C: Into<IndexedConstraintSystem<T, V>>> From<C>
    for IndexedConstraintSystemWithQueue<T, V>
{
    fn from(constraint_system: C) -> Self {
        let constraint_system = constraint_system.into();
        let queue = ConstraintSystemQueue::new(constraint_system.system());
        Self {
            constraint_system,
            queue,
        }
    }
}

impl<T, V> IndexedConstraintSystemWithQueue<T, V>
where
    T: RuntimeConstant + Substitutable<V>,
    V: Clone + Ord + Hash,
{
    /// Returns a reference to the underlying indexed constraint system.
    pub fn system(&self) -> &IndexedConstraintSystem<T, V> {
        &self.constraint_system
    }

    /// Removes the next item from the queue and returns it.
    pub fn pop_front<'a>(&'a mut self) -> Option<ConstraintRef<'a, T, V>> {
        self.queue.pop_front().map(|item| {
            item.try_to_constraint_ref(&self.constraint_system.constraint_system)
                // Derived variables should never be in the queue.
                .unwrap()
        })
    }

    /// Notifies the system that a variable has been updated and causes all constraints
    /// referencing that variable to be put back into the queue.
    ///
    /// Note that this function does not have to be called if the system is modified directly.
    pub fn variable_updated(&mut self, variable: &V) {
        if let Some(items) = self.constraint_system.variable_occurrences.get(variable) {
            for item in items {
                if !item.is_derived_variable() {
                    self.queue.push(*item);
                }
            }
        }
    }

    /// Substitutes a variable with a known value in the whole system.
    /// This function also updates the queue accordingly.
    ///
    /// It does not delete the occurrence of `variable` so that it can be used to check
    /// which constraints it used to occur in.
    pub fn substitute_by_unknown(&mut self, variable: &V, substitution: &GroupedExpression<T, V>) {
        self.constraint_system
            .substitute_by_unknown(variable, substitution);
        self.variable_updated(variable);
    }

    pub fn add_algebraic_constraints(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) {
        let initial_len = self
            .constraint_system
            .constraint_system
            .algebraic_constraints
            .len();
        self.constraint_system
            .add_algebraic_constraints(constraints.into_iter().enumerate().map(|(i, c)| {
                self.queue
                    .push(ConstraintSystemItem::AlgebraicConstraint(initial_len + i));
                c
            }));
    }

    pub fn add_bus_interactions(
        &mut self,
        bus_interactions: impl IntoIterator<Item = BusInteraction<GroupedExpression<T, V>>>,
    ) {
        let initial_len = self
            .constraint_system
            .constraint_system
            .bus_interactions
            .len();
        self.constraint_system
            .add_bus_interactions(bus_interactions.into_iter().enumerate().map(|(i, c)| {
                self.queue
                    .push(ConstraintSystemItem::BusInteraction(initial_len + i));
                c
            }));
    }

    pub fn retain_algebraic_constraints(
        &mut self,
        mut f: impl FnMut(&AlgebraicConstraint<GroupedExpression<T, V>>) -> bool,
    ) {
        self.constraint_system.retain_algebraic_constraints(&mut f);
        if !self.queue.queue.is_empty() {
            // Removing items will destroy the indices, which is only safe if
            // the queue is empty. Otherwise, we just put all items back into the queue.
            self.queue = ConstraintSystemQueue::new(self.constraint_system.system());
        }
    }

    pub fn retain_bus_interactions(
        &mut self,
        mut f: impl FnMut(&BusInteraction<GroupedExpression<T, V>>) -> bool,
    ) {
        self.constraint_system.retain_bus_interactions(&mut f);
        if !self.queue.queue.is_empty() {
            // Removing items will destroy the indices, which is only safe if
            // the queue is empty. Otherwise, we just put all items back into the queue.
            self.queue = ConstraintSystemQueue::new(self.constraint_system.system());
        }
    }
}

impl<T: RuntimeConstant + Display, V: Clone + Ord + Display + Hash> Display
    for IndexedConstraintSystemWithQueue<T, V>
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.constraint_system)
    }
}

/// The actual queue used in `IndexedConstraintSystemWithQueue`.
///
/// It keeps track that there are no duplicates in the queue by maintaining
/// a flat bitvector of items in the queue.
#[derive(Default, Clone)]
struct ConstraintSystemQueue {
    queue: VecDeque<ConstraintSystemItem>,
    in_queue: BitVec,
}

impl ConstraintSystemQueue {
    fn new<T, V>(constraint_system: &ConstraintSystem<T, V>) -> Self {
        let num_algebraic = constraint_system.algebraic_constraints.len();
        let num_bus = constraint_system.bus_interactions.len();
        let queue = (0..num_algebraic)
            .map(ConstraintSystemItem::AlgebraicConstraint)
            .chain((0..num_bus).map(ConstraintSystemItem::BusInteraction))
            .collect::<Vec<_>>()
            .into();
        // The maximum value of `item.flat_id()` is `2 * max(num_algebraic, num_bus) + 1`
        let mut in_queue = BitVec::repeat(false, 2 * cmp::max(num_algebraic, num_bus) + 2);
        for item in &queue {
            let item: &ConstraintSystemItem = item;
            in_queue.set(item.flat_constraint_id(), true);
        }
        Self { queue, in_queue }
    }

    fn push(&mut self, item: ConstraintSystemItem) {
        assert!(!item.is_derived_variable());
        if self.in_queue.len() <= item.flat_constraint_id() {
            self.in_queue.resize(item.flat_constraint_id() + 1, false);
        }
        if !self.in_queue[item.flat_constraint_id()] {
            self.queue.push_back(item);
            self.in_queue.set(item.flat_constraint_id(), true);
        }
    }

    fn pop_front(&mut self) -> Option<ConstraintSystemItem> {
        let item = self.queue.pop_front();
        if let Some(item) = &item {
            self.in_queue.set(item.flat_constraint_id(), false);
        }
        item
    }
}

#[cfg(test)]
mod tests {
    use powdr_number::GoldilocksField;

    use crate::constraint_system::ComputationMethod;

    use super::*;

    fn format_system(s: &IndexedConstraintSystem<GoldilocksField, &'static str>) -> String {
        format!(
            "{}  |  {}",
            s.algebraic_constraints().iter().format("  |  "),
            s.bus_interactions()
                .iter()
                .map(
                    |BusInteraction {
                         bus_id,
                         payload,
                         multiplicity,
                     }| format!(
                        "{bus_id}: {multiplicity} * [{}]",
                        payload.iter().format(", ")
                    )
                )
                .format("  |  ")
        )
    }

    #[test]
    fn substitute_by_unknown() {
        type Ge = GroupedExpression<GoldilocksField, &'static str>;
        let x = Ge::from_unknown_variable("x");
        let y = Ge::from_unknown_variable("y");
        let z = Ge::from_unknown_variable("z");
        let mut s: IndexedConstraintSystem<_, _> = ConstraintSystem::default()
            .with_constraints(vec![
                x.clone() + y.clone(),
                x.clone() - z.clone(),
                y.clone() - z.clone(),
            ])
            .with_bus_interactions(vec![BusInteraction {
                bus_id: x,
                payload: vec![y.clone(), z],
                multiplicity: y,
            }])
            .into();

        s.substitute_by_unknown(&"x", &Ge::from_unknown_variable("z"));

        assert_eq!(
            format_system(&s),
            "y + z = 0  |  0 = 0  |  y - z = 0  |  z: y * [y, z]"
        );

        s.substitute_by_unknown(
            &"z",
            &(Ge::from_unknown_variable("x") + Ge::from_number(GoldilocksField::from(7))),
        );

        assert_eq!(
            format_system(&s),
            "x + y + 7 = 0  |  0 = 0  |  -(x - y + 7) = 0  |  x + 7: y * [y, x + 7]"
        );
    }

    #[test]
    fn retain_update_index() {
        type Ge = GroupedExpression<GoldilocksField, &'static str>;
        let x = Ge::from_unknown_variable("x");
        let y = Ge::from_unknown_variable("y");
        let z = Ge::from_unknown_variable("z");
        let mut s: IndexedConstraintSystem<_, _> = ConstraintSystem::default()
            .with_constraints(vec![
                x.clone() + y.clone(),
                x.clone() - z.clone(),
                y.clone() - z.clone(),
            ])
            .with_bus_interactions(vec![
                BusInteraction {
                    bus_id: x.clone(),
                    payload: vec![y.clone(), z],
                    multiplicity: y,
                },
                BusInteraction {
                    bus_id: x.clone(),
                    payload: vec![x.clone(), x.clone()],
                    multiplicity: x,
                },
            ])
            .into();

        s.retain_algebraic_constraints(|c| !c.referenced_unknown_variables().any(|v| *v == "y"));
        s.retain_bus_interactions(|b| {
            !b.fields()
                .any(|e| e.referenced_unknown_variables().any(|v| *v == "y"))
        });

        assert_eq!(s.constraints_referencing_variables(&["y"]).count(), 0);
        let items_with_x = s
            .constraints_referencing_variables(&["x"])
            .map(|c| match c {
                ConstraintRef::AlgebraicConstraint(expr) => expr.to_string(),
                ConstraintRef::BusInteraction(bus_interaction) => {
                    format!(
                        "{}: {} * [{}]",
                        bus_interaction.bus_id,
                        bus_interaction.multiplicity,
                        bus_interaction.payload.iter().format(", ")
                    )
                }
            })
            .format(", ")
            .to_string();
        assert_eq!(items_with_x, "x - z = 0, x: x * [x, x]");

        let items_with_z = s
            .constraints_referencing_variables(&["z"])
            .map(|c| match c {
                ConstraintRef::AlgebraicConstraint(expr) => expr.to_string(),
                ConstraintRef::BusInteraction(bus_interaction) => {
                    format!(
                        "{}: {} * [{}]",
                        bus_interaction.bus_id,
                        bus_interaction.multiplicity,
                        bus_interaction.payload.iter().format(", ")
                    )
                }
            })
            .format(", ")
            .to_string();
        assert_eq!(items_with_z, "x - z = 0");
    }

    #[test]
    fn substitute_in_derived_columns() {
        let mut system: IndexedConstraintSystem<_, _> = ConstraintSystem::<GoldilocksField, _> {
            algebraic_constraints: vec![],
            bus_interactions: vec![],
            derived_variables: vec![
                DerivedVariable::new(
                    "d1",
                    ComputationMethod::QuotientOrZero(
                        GroupedExpression::from_unknown_variable("x1"),
                        GroupedExpression::from_unknown_variable("x2"),
                    ),
                ),
                DerivedVariable::new(
                    "d2",
                    ComputationMethod::QuotientOrZero(
                        GroupedExpression::from_unknown_variable("y1"),
                        GroupedExpression::from_unknown_variable("y2"),
                    ),
                ),
            ],
        }
        .into();
        // We first substitute `y2` by an expression that contains `x1` such that when we
        // substitute `x1` in the next step, `d2` has to be updated again.
        system.substitute_by_unknown(
            &"y2",
            &(GroupedExpression::from_unknown_variable("x1")
                + GroupedExpression::from_number(7.into())),
        );
        system.substitute_by_known(&"x1", &1.into());
        assert_eq!(
            format!("{system}"),
            "d1 := QuotientOrZero(1, x2)\nd2 := QuotientOrZero(y1, 8)"
        );
    }
}


================================================
FILE: constraint-solver/src/inliner.rs
================================================
use crate::constraint_system::{AlgebraicConstraint, ConstraintRef};
use crate::grouped_expression::GroupedExpression;
use crate::indexed_constraint_system::IndexedConstraintSystem;

use itertools::Itertools;
use powdr_number::FieldElement;
use serde::{Deserialize, Serialize};

use std::collections::{BTreeMap, HashSet};
use std::fmt::Display;
use std::hash::Hash;

#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct DegreeBound {
    pub identities: usize,
    pub bus_interactions: usize,
}

/// Reduce variables in the constraint system by inlining them,
/// if the callback `should_inline` returns true.
/// Returns the modified system and a list of inlined variables and their expressions.
pub fn replace_constrained_witness_columns<
    T: FieldElement,
    V: Ord + Clone + Hash + Eq + Display,
>(
    mut constraint_system: IndexedConstraintSystem<T, V>,
    should_inline: impl Fn(&V, &GroupedExpression<T, V>, &IndexedConstraintSystem<T, V>) -> bool,
) -> (
    IndexedConstraintSystem<T, V>,
    BTreeMap<V, GroupedExpression<T, V>>,
) {
    let mut to_remove_idx = HashSet::new();
    let mut substitutions = BTreeMap::new();
    let constraint_count = constraint_system.algebraic_constraints().len();
    loop {
        let inlined_vars_count = substitutions.len();
        for curr_idx in (0..constraint_count).rev() {
            let constraint = &constraint_system.algebraic_constraints()[curr_idx];

            for (var, expr) in find_inlinable_variables(constraint) {
                if should_inline(&var, &expr, &constraint_system) {
                    log::trace!("Substituting {var} = {expr}");
                    log::trace!("  (from identity {constraint})");

                    constraint_system.substitute_by_unknown(&var, &expr);
                    to_remove_idx.insert(curr_idx);
                    substitutions.insert(var, expr);

                    break;
                }
            }
        }
        if substitutions.len() == inlined_vars_count {
            // No more variables to inline
            break;
        }
    }

    // remove inlined constraints from system
    let mut counter = 0;
    constraint_system.retain_algebraic_constraints(|_| {
        let retain = !to_remove_idx.contains(&(counter));
        counter += 1;
        retain
    });

    // sanity check
    assert!(constraint_system
        .referenced_unknown_variables()
        .all(|var| { !substitutions.contains_key(var) }));

    (constraint_system, substitutions)
}

/// Returns an inlining discriminator that allows everything to be inlined as long as
/// the given degree bound is not violated.
pub fn inline_everything_below_degree_bound<T: FieldElement, V: Ord + Clone + Hash + Eq>(
    degree_bound: DegreeBound,
) -> impl Fn(&V, &GroupedExpression<T, V>, &IndexedConstraintSystem<T, V>) -> bool {
    move |var, expr, constraint_system| {
        substitution_would_not_violate_degree_bound(var, expr, constraint_system, degree_bound)
    }
}

/// Returns true if substituting `var` by `expr` inside `constraint_system` would
/// not create new constraints with a degree larger than `degree_bound`
pub fn substitution_would_not_violate_degree_bound<T: FieldElement, V: Ord + Clone + Hash + Eq>(
    var: &V,
    expr: &GroupedExpression<T, V>,
    constraint_system: &IndexedConstraintSystem<T, V>,
    degree_bound: DegreeBound,
) -> bool {
    let replacement_deg = expr.degree();

    constraint_system
        .constraints_referencing_variables(std::iter::once(var))
        .all(|cref| match cref {
            ConstraintRef::AlgebraicConstraint(identity) => {
                let degree = expression_degree_with_virtual_substitution(
                    identity.expression,
                    var,
                    replacement_deg,
                );
                degree <= degree_bound.identities
            }
            ConstraintRef::BusInteraction(interaction) => interaction.fields().all(|expr| {
                let degree =
                    expression_degree_with_virtual_substitution(expr, var, replacement_deg);
                degree <= degree_bound.bus_interactions
            }),
        })
}

/// Returns substitutions of variables that appear linearly and do not depend on themselves.
fn find_inlinable_variables<T: FieldElement, V: Ord + Clone + Hash + Eq + Display>(
    constraint: &AlgebraicConstraint<GroupedExpression<T, V>>,
) -> Vec<(V, GroupedExpression<T, V>)> {
    constraint
        .expression
        .linear_components()
        .rev()
        .filter_map(|(target_var, _)| {
            let rhs_expr = constraint.as_ref().try_solve_for(target_var)?;
            assert!(!rhs_expr.referenced_unknown_variables().contains(target_var));
            Some((target_var.clone(), rhs_expr))
        })
        .collect()
}

/// Calculate the degree of a GroupedExpression assuming a variable is
/// replaced by an expression of known degree.
fn expression_degree_with_virtual_substitution<T: FieldElement, V: Ord + Clone + Eq>(
    expr: &GroupedExpression<T, V>,
    var: &V,
    replacement_deg: usize,
) -> usize {
    let quadratic = expr.quadratic_components();
    let linear = expr.linear_components();
    quadratic
        .iter()
        .map(|(l, r)| {
            expression_degree_with_virtual_substitution(l, var, replacement_deg)
                + expression_degree_with_virtual_substitution(r, var, replacement_deg)
        })
        .chain(linear.map(|(v, _)| if v == var { replacement_deg } else { 1 }))
        .max()
        .unwrap_or(0)
}

#[cfg(test)]
mod test {
    use crate::constraint_system::{BusInteraction, ConstraintSystem};

    use super::*;

    use powdr_number::GoldilocksField;
    use test_log::test;

    pub fn var(name: &'static str) -> GroupedExpression<GoldilocksField, &'static str> {
        GroupedExpression::from_unknown_variable(name)
    }

    pub fn constant(value: u64) -> GroupedExpression<GoldilocksField, &'static str> {
        GroupedExpression::from_number(GoldilocksField::from(value))
    }

    fn bounds<T: FieldElement, V: Ord + Clone + Hash + Eq>(
        identities: usize,
        bus_interactions: usize,
    ) -> impl Fn(&V, &GroupedExpression<T, V>, &IndexedConstraintSystem<T, V>) -> bool {
        inline_everything_below_degree_bound(DegreeBound {
            identities,
            bus_interactions,
        })
    }

    #[test]
    fn test_no_substitution() {
        let constraint_system = ConstraintSystem::default()
            .with_constraints(vec![
                var("a") * var("b") + var("c") * var("d"),
                var("e") * var("e") - constant(2),
            ])
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));
        assert_eq!(constraint_system.algebraic_constraints().len(), 2);
    }

    #[test]
    fn test_replace_witness_columns() {
        // keep column result
        let bus_interactions = vec![BusInteraction {
            bus_id: constant(1),
            payload: vec![var("0result"), var("b")],
            multiplicity: constant(1),
        }];

        let constraint_system = ConstraintSystem::default()
            .with_constraints(vec![
                var("a") + var("b") + var("c"),
                var("b") + var("d") - constant(1),
                var("c") + var("b") + var("a") + var("d") - var("0result"),
            ])
            .with_bus_interactions(bus_interactions)
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));
        // 1) a + b + c = 0        => a = -b - c
        // 2) b + d - 1 = 0        => d = -b + 1
        // 3) c + b + a + d = result
        //    =(1)=> c + b + (-b - c) + d
        //         = (c - c) + (b - b) + d
        //         = 0 + 0 + d
        //    => result = d = -b + 1
        //    => b = -result + 1
        assert_eq!(constraint_system.algebraic_constraints().len(), 0);

        let bus_interactions = constraint_system.bus_interactions();
        let [BusInteraction { payload, .. }] = bus_interactions else {
            panic!();
        };
        let [result, b] = payload.as_slice() else {
            panic!();
        };
        assert_eq!(result.to_string(), "0result");
        assert_eq!(b.to_string(), "-(0result - 1)");
    }

    #[test]
    fn test_replace_witness_columns_with_multiplication() {
        let mut identities = Vec::new();

        // a * b = c
        let constraint1 = var("c") - var("a") * var("b");
        identities.push(constraint1);

        // b + d = 0
        let constraint2 = var("b") + var("d");
        identities.push(constraint2);

        // a + b + c + d - result = 0
        let expr = var("a") + var("b") + var("c") + var("d");
        let expr_constraint = expr.clone() - var("result");
        identities.push(expr_constraint);

        // keep column `result`
        let bus_interactions = vec![BusInteraction {
            bus_id: constant(1),
            payload: vec![var("result")],
            multiplicity: constant(1),
        }];

        let constraint_system = ConstraintSystem::default()
            .with_constraints(identities)
            .with_bus_interactions(bus_interactions)
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));

        let constraints = constraint_system.algebraic_constraints();
        assert_eq!(constraints.len(), 0);
    }

    #[test]
    fn test_replace_witness_columns_no_keep() {
        let mut identities = Vec::new();

        // a * b = c
        let constraint1 = var("c") - var("a") * var("b");
        identities.push(constraint1);

        // b + d = 0
        let constraint2 = var("b") + var("d");
        identities.push(constraint2);

        // c * d = e
        let constraint3 = var("e") - var("c") * var("d");
        identities.push(constraint3);

        // a + b + c + d + e - result = 0
        let expr = var("a") + var("b") + var("c") + var("d") + var("e");
        let expr_constraint = expr.clone() - var("result");
        identities.push(expr_constraint);

        // no columns to keep
        let constraint_system = ConstraintSystem::default()
            .with_constraints(identities)
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));

        let constraints = constraint_system.algebraic_constraints();
        assert_eq!(constraints.len(), 0);
    }

    #[test]
    fn test_replace_constrained_witness_suboptimal() {
        // Keep x and result
        let bus_interactions = vec![BusInteraction {
            bus_id: constant(1),
            payload: vec![var("result"), var("x")],
            multiplicity: constant(1),
        }];

        let constraint_system = ConstraintSystem::default()
            .with_constraints(vec![
                var("y") - (var("x") + constant(3)),
                var("z") - (var("y") + constant(2)),
                var("result") - (var("z") + constant(1)),
            ])
            .with_bus_interactions(bus_interactions)
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));
        // 1) y = x + 3
        // 2) z = y + 2 ⇒ z = (x + 3) + 2 = x + 5
        // 3) result = z + 1 ⇒ result = (x + 5) + 1 = x + 6
        let bus_interactions = constraint_system.bus_interactions();
        let [BusInteraction { payload, .. }] = bus_interactions else {
            panic!();
        };
        let [result, x] = payload.as_slice() else {
            panic!();
        };
        assert_eq!(result.to_string(), "result");
        assert_eq!(x.to_string(), "result - 6");
    }

    #[test]
    fn test_replace_constrained_witness_columns_max_degree_limit() {
        let constraint_system = ConstraintSystem::default()
            .with_constraints(vec![
                var("a") - (var("b") + constant(1)),
                var("c") - (var("a") * var("a")),
                var("d") - (var("c") * var("a")),
                var("e") - (var("d") * var("a")),
                var("f") - (var("e") + constant(5)),
                var("result") - (var("f") * constant(2)),
            ])
            .with_bus_interactions(
                // Get all variables
                vec![BusInteraction {
                    bus_id: constant(1),
                    payload: vec![
                        var("a"),
                        var("b"),
                        var("c"),
                        var("d"),
                        var("e"),
                        var("f"),
                        var("result"),
                    ],
                    multiplicity: constant(1),
                }],
            )
            .into();

        let (constraint_system, _) =
            replace_constrained_witness_columns(constraint_system, bounds(3, 3));

        let constraints = constraint_system.algebraic_constraints();
        let [identity] = constraints else {
            panic!();
        };
        let bus_interactions = constraint_system.bus_interactions();
        let [BusInteraction { payload, .. }] = bus_interactions else {
            panic!();
        };
        let [a, b, c, d, e, f, result] = payload.as_slice() else {
            panic!();
        };
        assert_eq!(a.to_string(), "a");
        assert_eq!(b.to_string(), "a - 1");
        // From second identity: c = a * a
        // In-lining c would violate the degree bound, so it is kept as a symbol
        // with a constraint to enforce the equality.
        assert_eq!(c.to_string(), "c");
        assert_eq!(identity.to_string(), "-((a) * (a) - c) = 0");
        // From third identity: d = c * a
        assert_eq!(d.to_string(), "(c) * (a)");
        // From fourth identity: e = d * a
        assert_eq!(e.to_string(), "((c) * (a)) * (a)");
        // From fifth identity: f = e + 5
        assert_eq!(f.to_string(), "((c) * (a)) * (a) + 5");
        // From sixth identity: result = f * 2
        assert_eq!(result.to_string(), "((2 * c) * (a)) * (a) + 10");
    }

    #[test]
    fn test_inline_max_degree_suboptimal_greedy() {
        // Show how constraint order affects optimization results

        // Define the constraints in both orders
        let mut optimal_order_identities = Vec::new();
        let mut suboptimal_order_identities = Vec::new();

        // a = b * b * b
        let constraint1 = var("a") - var("b") * var("b") * var("b");
        // b = c + d
        let constraint2 = var("b") - (var("c") + var("d"));
        // a * c * c = 10
        let constraint3 = var("a") * var("c") * var("c") - constant(10);
        // c = d * d
        let constraint4 = var("c") - var("d") * var("d");
        // a + b + c + d = 100
        let constraint5 = var("a") + var("b") + var("c") + var("d") - constant(100);

        // Optimal order
        optimal_order_identities.push(constraint1.clone()); // a = b * b * b
        optimal_order_identities.push(constraint2.clone()); // b = c + d
        optimal_order_identities.push(constraint3.clone()); // a * c * c = 10
        optimal_order_identities.push(constraint4.clone()); // c = d * d
        optimal_order_identities.push(constraint5.clone()); // a + b + c + d = 100

        // Suboptimal order
        suboptimal_order_identities.push(constraint5.clone()); // a + b + c + d = 100
        suboptimal_order_identities.push(constraint3.clone()); // a * c * c = 10
        suboptimal_order_identities.push(constraint1.clone()); // a = b * b * b
        suboptimal_order_identities.push(constraint2.clone()); // b = c + d
        suboptimal_order_identities.push(constraint4.clone()); // c = d * d

        let optimal_system = ConstraintSystem::default()
            .with_constraints(optimal_order_identities)
            .into();

        let suboptimal_system = ConstraintSystem::default()
            .with_constraints(suboptimal_order_identities)
            .into();

        // Apply the same optimization to both systems
        let (optimal_system, _) = replace_constrained_witness_columns(optimal_system, bounds(5, 5));

        let (suboptimal_system, _) =
            replace_constrained_witness_columns(suboptimal_system, bounds(5, 5));

        // Assert the difference in optimization results
        assert_eq!(optimal_system.algebraic_constraints().len(), 3);
        assert_eq!(suboptimal_system.algebraic_constraints().len(), 4);
    }
}


================================================
FILE: constraint-solver/src/lib.rs
================================================
//! Tooling used for analysis and solving of constraints.

pub mod algebraic_constraint;
pub mod bus_interaction_handler;
pub mod constraint_system;
pub mod effect;
pub mod grouped_expression;
pub mod indexed_constraint_system;
pub mod inliner;
pub mod range_constraint;
pub mod reachability;
pub mod rule_based_optimizer;
pub mod runtime_constant;
pub mod solver;
pub mod symbolic_expression;
pub mod system_splitter;
pub mod test_utils;
pub mod utils;
pub mod variable_update;


================================================
FILE: constraint-solver/src/range_constraint.rs
================================================
use std::fmt::{Debug, Display, Formatter};
use std::{cmp, ops};

use num_traits::Zero;

use powdr_number::{log2_exact, FieldElement, LargeInt};

/// In an abstract way, a RangeConstraint is just a set of values. It is mainly used to
/// combine the effects of multiple AlgebraicConstraints on the same variable.
///
/// Currently, we can represent interval ranges (both "wrapping" and "non-wrapping" ones)
/// and bit masks. The actual constraint is the conjunction of the two.
///
/// The idea behind wrapping intervals is that we want to represent both signed and
/// unsigned numbers. Furthermore, by supporting wrapping intervals we do not lose
/// any information when adding or substracting constants.
///
/// The semantics and correctness of RangeConstraints is mainly defined by the following notion:
///
/// We say a RangeConstraint `r` on an expression `e` is `valid` in a ConstraintSystem
/// if for every satisfying assignment of the ConstraintSystem, the value of `e`
/// under this assignment is allowed by `r`.
///
/// All the operations on RangeConstraints (like combine_sum, conjunction, ...) preserve
/// validity, i.e. if we have an expression `e1 + e2` and we know that `r1` is a valid
/// RangeConstraint for `e1` and `r2` is a valid RangeConstraint for `e2`, then
/// the result of `r1.combine_sum(r2)` is a valid RangeConstraint for `e1 + e2`.
///
/// In particular, a fully unconstrained RangeConstraint is always valid for every expression.
/// in this way, range constraints are an over-approximation, i.e. they can be less strict
/// than the expressions they model. They might allow a value that is actually not
/// possible, but if the range constraint disallows a value, this value is definitely
/// not possible. This is consistent because e.g. an algebraic constraint in isolation
/// also over-approximates in contrast to this constraint being in the context
/// of the full system.
///
/// Finally, please be aware that same constraint can have multiple representations.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RangeConstraint<T: FieldElement> {
    /// Bit-mask. A value `x` is allowed only if `x & mask == x` (when seen as unsigned integer).
    mask: T::Integer,
    /// Min-max inclusive range. Note that `max` can be smaller than `min`. In this case the range wraps.
    /// If min <= max (seen as unsigned integers), then the constraint on `x` is `min <= x && x <= max`.
    /// If min > max, then the constraint is `min <= x || x <= max`.
    min: T,
    max: T,
}

impl<T: FieldElement> RangeConstraint<T> {
    /// Constraint that allows no higher bits set than the one given
    /// (counting from zero).
    pub fn from_max_bit(max_bit: usize) -> Self {
        Self::from_mask(mask_from_bits::<T>(max_bit + 1))
    }

    /// Constraint that forces several bits to be set to zero.
    /// Semantics: x & mask == x
    pub fn from_mask<M: Into<T::Integer>>(mask: M) -> Self {
        let mask = mask.into();
        let max = T::from(cmp::min(mask, (T::from(-1)).to_integer()));
        Self {
            mask,
            min: T::zero(),
            max,
        }
    }

    /// Constraint that only allows this exact value.
    pub fn from_value(value: T) -> Self {
        Self {
            mask: value.to_integer(),
            min: value,
            max: value,
        }
    }

    /// Constraint that allows the values `min`, `min + 1`, ..., `max`.
    /// Since this sequence can wrap around the field modulus, it means that
    /// `x` is allowed if and only if:
    /// - min <= x && x <= max  if min <= max
    /// - min <= x || x <= max  if min > max,
    #[inline]
    pub fn from_range(min: T, max: T) -> Self {
        let mask = if min <= max {
            mask_from_bits::<T>(max.to_integer().num_bits())
        } else {
            Self::unconstrained().mask
        };
        Self { mask, min, max }
    }

    /// Returns a constraint that allows any value.
    pub fn unconstrained() -> Self {
        Self::from_range(T::from(0), T::from(-1))
    }

    /// Returns true if the range constraint does not impose any
    /// restrictions on the values.
    pub fn is_unconstrained(&self) -> bool {
        let un = Self::unconstrained();
        self.range_width() == un.range_width() && (self.mask & un.mask) == un.mask
    }

    /// Returns a bit mask. This might be drastically under-fitted in case
    /// the constraint is more resembling an interval.
    /// Semantics: X & mask == X holds for all possible values of X.
    pub fn mask(&self) -> &T::Integer {
        &self.mask
    }

    /// Returns the interval part [min..=max] of the Range Constraint.
    /// Note that `max` can be smaller than `min`. In this case the range wraps.
    /// Semantics, with (min, max) = range():
    /// If min <= max, this means min <= x && x <= max.
    /// If min > max, this means min <= x || x <= max.
    pub fn range(&self) -> (T, T) {
        (self.min, self.max)
    }

    /// Returns the number of elements between the min and the max value, disregarding the mask and
    /// potentially other constraints.
    pub fn range_width(&self) -> T::Integer {
        range_width(self.min, self.max)
    }

    /// Returns (an upper bound for) the number of field elements included in the constraint.
    pub fn size_estimate(&self) -> T::Integer {
        self.range_width()
    }

    pub fn allows_value(&self, v: T) -> bool {
        let in_range = if self.min <= self.max {
            self.min <= v && v <= self.max
        } else {
            self.min <= v || v <= self.max
        };
        let in_mask = v.to_integer() & self.mask == v.to_integer();
        in_range && in_mask
    }

    /// The range constraint of the sum of two expressions:
    /// If `r1` is a valid RangeConstraint for `e1` and `r2` is a valid RangeConstraint for `e2`,
    /// then `r1.combine_sum(r2)` is a valid RangeConstraint for `e1 + e2`.
    pub fn combine_sum(&self, other: &Self) -> Self {
        let unconstrained = Self::unconstrained();
        // TODO we could use "add_with_carry" to see if this created an overflow.
        // it might even be enough to check if certain bits are set in the masks.
        let mut mask = if self.mask.to_arbitrary_integer() + other.mask.to_arbitrary_integer()
            >= T::modulus().to_arbitrary_integer()
        {
            unconstrained.mask
        } else {
            // This could be made stricter.
            (self.mask + other.mask) | self.mask | other.mask
        };

        let (min, max) = if self.range_width().to_arbitrary_integer()
            + other.range_width().to_arbitrary_integer()
            <= unconstrained.range_width().to_arbitrary_integer()
        {
            (self.min + other.min, self.max + other.max)
        } else {
            unconstrained.range()
        };
        if min <= max {
            mask &= Self::from_range(min, max).mask;
        }
        Self { min, max, mask }
    }

    /// The range constraint of the product of two expressions:
    /// If `r1` is a valid RangeConstraint for `e1` and `r2` is a valid RangeConstraint for `e2`,
    /// then `r1.combine_product(r2)` is a valid RangeConstraint for `e1 * e2`.
    pub fn combine_product(&self, other: &Self) -> Self {
        if let Some(v) = other.try_to_single_value() {
            self.multiple(v)
        } else if let Some(v) = self.try_to_single_value() {
            other.multiple(v)
        } else if self.min <= self.max
            && other.min <= other.max
            && self.max.to_arbitrary_integer() * other.max.to_arbitrary_integer()
                < T::modulus().to_arbitrary_integer()
        {
            Self::from_range(self.min * other.min, self.max * other.max)
        } else {
            Self::unconstrained()
        }
    }

    /// If `Self` is a valid range constraint on an expression `e`, returns
    /// a valid range constraint for `e * e`.
    pub fn square(&self) -> Self {
        if self.min > self.max {
            // If we have "negative" values, make sure that the square
            // is non-negative.
            let max_abs = std::cmp::max(-self.min, self.max);
            if max_abs.to_arbitrary_integer() * max_abs.to_arbitrary_integer()
                < T::modulus().to_arbitrary_integer()
            {
                return Self::from_range(T::zero(), max_abs * max_abs);
            }
        }

        self.combine_product(self)
    }

    /// Returns the conjunction of this constraint and the other.
    /// This operation is not lossless, but if `r1` and `r2` allow
    /// a value `x`, then `r1.conjunction(r2)` also allows `x`.
    /// Furthermore, if `r1` and `r2` are valid RangeConstraints for
    /// the same expression `e`, then `r1.conjunction(r2)` is also a valid
    /// RangeConstraint for `e`.
    pub fn conjunction(&self, other: &Self) -> Self {
        let mut mask = self.mask & other.mask;
        // We might lose information because the intersection of two potentially wrapping
        // intervals can be more than one (potentially wrapping) intervals.
        let (mut min, mut max) =
            interval_intersection((self.min, self.max), (other.min, other.max))
                .unwrap_or((0.into(), 0.into()));

        // Now try to derive better values for the mask from the new range
        // and vice-versa.
        if mask < T::modulus() {
            if min <= max {
                // If we adjust both min and max, the right way could be
                // to have an empty range. On the other hand, this should not
                // be incorrect.
                min = cmp::min(mask.into(), min);
                max = cmp::min(mask.into(), max);
            } else if min.to_integer() > mask {
                min = T::zero();
                max = cmp::min(mask.into(), max);
            } else {
                // max < min <= mask
                // the proper intersection here cannot always be represented by
                // a single interval. Let's just leave it as it is.
            }
        }
        if min <= max {
            mask &= Self::from_range(min, max).mask;
        }

        Self { min, max, mask }
    }

    /// Returns the disjunction of this constraint and the other.
    /// This operation is not lossless, but if `r1` or `r2` allow
    /// a value `x`, then `r1.disjunction(r2)` also allows `x`.
    /// Furthermore, if `r1` OR `r2` is a valid RangeConstraint for
    /// the same expression `e`, then `r1.disjunction(r2)` is a valid
    /// RangeConstraint for `e`.
    pub fn disjunction(&self, other: &Self) -> Self {
        let mask = self.mask | other.mask;
        match (self.min <= self.max, other.min <= other.max) {
            (true, true) => Self {
                min: cmp::min(self.min, other.min),
                max: cmp::max(self.max, other.max),
                mask,
            },
            (true, false) | (false, true) => {
                // These cases are too complicated - we could refine them in the future.
                Self::from_mask(mask)
            }
            (false, false) => {
                let min = cmp::min(self.min, other.min);
                let max = cmp::max(self.max, other.max);
                if min <= max {
                    // The ranges cover the full field.
                    Self::from_mask(mask)
                } else {
                    Self { min, max, mask }
                }
            }
        }
    }

    /// The constraint of an integer multiple of an expression.
    /// If `r` is a valid RangeConstraint for `e`, then `r.multiple(factor)`
    /// is a valid RangeConstraint for `factor * e`.
    pub fn multiple(&self, factor: T) -> Self {
        let mask = log2_exact(factor.to_arbitrary_integer()).and_then(|exponent| {
            (self.mask.to_arbitrary_integer() << exponent < T::modulus().to_arbitrary_integer())
                .then(|| self.mask << exponent)
        });
        let (min, max) = if factor.is_in_lower_half() {
            range_multiple(self.min, self.max, factor)
        } else {
            range_multiple(-self.max, -self.min, -factor)
        };
        Self {
            min,
            max,
            mask: mask.unwrap_or_else(|| Self::from_range(min, max).mask),
        }
    }

    /// If only a single value satisfies this condition, returns this value.
    pub fn try_to_single_value(&self) -> Option<T> {
        if self.min == self.max && self.min.to_integer() & self.mask == self.min.to_integer() {
            Some(self.min)
        } else {
            None
        }
    }

    /// If this function returns true, then no value can satisfy both range constraints at the same time.
    /// If it returns false, this might also be the case, but we cannot be sure.
    pub fn is_disjoint(&self, other: &RangeConstraint<T>) -> bool {
        // True if the intersection allows zero.
        let zero_allowed = self.allows_value(T::zero()) && other.allows_value(T::zero());
        // True if the intersection is empty when looking at the masks (and zero) only.
        let masks_disjoint = !zero_allowed && (self.mask & other.mask).is_zero();
        // True if the intersection is empty when looking at ranges only.
        let intervals_disjoint =
            interval_intersection((self.min, self.max), (other.min, other.max)).is_none();
        masks_disjoint || intervals_disjoint
    }

    /// Returns the allowed values of this range constraint.
    /// Panics if the range width is larger than 2^32 (in which case you
    /// probably don't want to call this function).
    pub fn allowed_values(&self) -> impl Iterator<Item = T> + '_ {
        (0..self.range_width().try_into_u32().unwrap())
            .map(move |offset| self.min + T::from(offset))
            .filter(|value| self.allows_value(*value))
    }
}

impl<T: FieldElement> Default for RangeConstraint<T> {
    fn default() -> Self {
        Self::unconstrained()
    }
}

/// The number of elements in an (inclusive) min/max range.
/// Works both if min is smaller than max and if it is larger (the inverted interval).
fn range_width<T: FieldElement>(min: T, max: T) -> T::Integer {
    if max + T::one() == min {
        T::modulus()
    } else {
        (max - min + T::one()).to_integer()
    }
}

#[inline]
fn mask_from_bits<T: FieldElement>(bits: usize) -> T::Integer {
    if bits == 0 {
        T::Integer::zero()
    } else {
        let max = !T::Integer::zero();
        let max_bits = T::Integer::NUM_BITS;
        assert!(bits <= max_bits);
        max >> (max_bits - bits)
    }
}

/// If an expression `x` is in the range `[min, max]`, returns
/// an a range `[min', max']` such that `factor * x` is in that range.
///
/// Inverted ranges are possible for both the input and the output.
fn range_multiple<T: FieldElement>(min: T, max: T, factor: T) -> (T, T) {
    // This is correct by iterated addition.
    if range_width(min, max).to_arbitrary_integer() * factor.to_arbitrary_integer()
        <= T::modulus().to_arbitrary_integer()
    {
        (min * factor, max * factor)
    } else {
        // The range that allows all values
        (T::one(), T::zero())
    }
}

/// Computes the intersection of two intervals.
/// There are cases where the intersection cannot be represented as a single internal.
/// in that case, it returns the smaller of the two inputs (which is a correct
/// range constraint in the sense that they can always be under-approximations,
/// but it loses some information).
/// If the intersection is empty, returns None.
fn interval_intersection<T: FieldElement>(a: (T, T), b: (T, T)) -> Option<(T, T)> {
    // We shift both intervals until they are both non-wrapping intervals.
    // If we do not succeed after shifting both of them by the smallest amount,
    // it means that the intersection cannot be expressed as a single interval.
    // In that case we just choose the smaller of the two inputs.
    match [a.0, b.0].into_iter().find_map(|shift| {
        let a_shifted = shifted_interval(a, -shift);
        let b_shifted = shifted_interval(b, -shift);
        (a_shifted.0 <= a_shifted.1 && b_shifted.0 <= b_shifted.1)
            .then_some((shift, (a_shifted, b_shifted)))
    }) {
        Some((shift, (a_shifted, b_shifted))) => {
            let intersection = (
                cmp::max(a_shifted.0, b_shifted.0),
                cmp::min(a_shifted.1, b_shifted.1),
            );
            // If min is larger than max, the intersection is empty.
            (intersection.0 <= intersection.1).then_some(shifted_interval(intersection, shift))
        }
        None => {
            // The intersection consists of two intervals. We cannot represent that,
            // so we return the smaller of the input intervals.
            if range_width(a.0, a.1) <= range_width(b.0, b.1) {
                Some(a)
            } else {
                Some(b)
            }
        }
    }
}

fn shifted_interval<T: FieldElement>((min, max): (T, T), shift: T) -> (T, T) {
    (min + shift, max + shift)
}

impl<T: FieldElement> ops::Neg for RangeConstraint<T> {
    type Output = Self;

    fn neg(self) -> Self::Output {
        let (min, max) = self.range();
        Self::from_range(-max, -min)
    }
}

impl<T: FieldElement> Display for RangeConstraint<T> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "[{}, {}] & 0x{:x}",
            format_negated(self.min),
            format_negated(self.max),
            self.mask()
        )
    }
}

fn format_negated<T: FieldElement>(value: T) -> String {
    if value.is_in_lower_half() {
        value.to_string()
    } else {
        format!("-{}", -value)
    }
}

#[cfg(test)]
mod test {
    use itertools::Itertools;
    use powdr_number::{BabyBearField, GoldilocksField};
    use pretty_assertions::assert_eq;

    use super::*;

    type RCg = RangeConstraint<GoldilocksField>;

    #[test]
    fn from_max_bit() {
        assert_eq!(*RCg::from_max_bit(0).mask(), 1u64.into());
        assert_eq!(*RCg::from_max_bit(1).mask(), 3u64.into());
        assert_eq!(*RCg::from_max_bit(63).mask(), (u64::MAX).into());
    }

    #[test]
    fn from_value() {
        assert_eq!(
            RCg::from_value(9.into()),
            RCg {
                min: 9.into(),
                max: 9.into(),
                mask: 9u32.into()
            }
        );
    }

    #[test]
    fn from_range() {
        assert_eq!(
            RCg::from_range(3.into(), 9.into()),
            RCg {
                min: 3.into(),
                max: 9.into(),
                mask: 15u32.into()
            }
        );
        assert_eq!(
            RCg::from_range(9.into(), 3.into()),
            RCg {
                min: 9.into(),
                max: 3.into(),
                mask: u64::MAX.into()
            }
        );
    }

    #[test]
    fn range_width() {
        assert_eq!(RCg::from_value(7.into()).range_width(), 1u32.into());
        assert_eq!(
            RCg::from_range(3.into(), 7.into()).range_width(),
            5u32.into()
        );
        assert_eq!(
            RCg::from_range(8.into(), 2.into()).range_width(),
            // This is the range above, just inverted.
            // So we should have the whole field minus five.
            GoldilocksField::from(-5).to_integer()
        );
        assert_eq!(
            RCg::from_mask(0xf00fu32).range_width(),
            (0xf00fu32 + 1).into()
        );
    }

    #[test]
    fn combine_sum() {
        assert_eq!(
            RCg::from_range(3.into(), 7.into())
                .combine_sum(&RCg::from_range(15.into(), 300.into())),
            RCg {
                min: 18.into(),
                max: 307.into(),
                mask: 511u32.into()
            }
        );
        assert_eq!(
            RCg::from_mask(0x1100u32).combine_sum(&RCg::from_mask(0xffu32)),
            RCg {
                min: 0.into(),
                max: 0x11ffu32.into(),
                mask: 0x11ffu32.into()
            }
        );
        assert_eq!(
            RCg::from_mask(0x1110u32).combine_sum(&RCg::from_mask(0xffu32)),
            RCg {
                min: 0.into(),
                max: 0x120fu32.into(),
                mask: 0x13ffu32.into()
            }
        );

        // Test overflow of masks. Modulus is: 0xffffffff00000001
        assert!(RCg::from_mask(0xefffffff00000001u64)
            .combine_sum(&RCg::from_mask(0x7ffffffff0000000u64))
            .is_unconstrained());
    }

    #[test]
    fn combine_sum_around_modulus() {
        let modulus = 0xffffffff00000001u64;
        // Test min-max range width around modulus
        let half_modulus_range = RCg::from_range(7.into(), (modulus / 2 + 6).into());
        assert_eq!(
            half_modulus_range.range_width() + half_modulus_range.range_width() + 1u32.into(),
            modulus.into(),
        );

        // Sum of range widths is one less than modulus.
        assert_eq!(
            half_modulus_range.combine_sum(&half_modulus_range),
            RCg {
                min: 14.into(),
                max: 11.into(), // (modulus - 1) / 2 * 2 + 12 - modulus = 11
                mask: u64::MAX.into(),
            }
        );

        // Sum of range widths is equal to modulus.
        let two_range = RCg::from_range(50.into(), 51.into());
        let half_modulus_plus_one_range = half_modulus_range.combine_sum(&two_range);
        assert_eq!(
            half_modulus_range.range_width() + half_modulus_plus_one_range.range_width(),
            modulus.into(),
        );
        assert_eq!(
            half_modulus_range.combine_sum(&half_modulus_plus_one_range),
            RCg {
                min: 64.into(),
                max: 62.into(),
                mask: u64::MAX.into(),
            }
        );

        // Sum of range widths is larger than modulus.
        let two_range = RCg::from_range(50.into(), 51.into());
        let half_modulus_plus_one_range = half_modulus_range.combine_sum(&two_range);
        assert!(half_modulus_range
            .combine_sum(&half_modulus_plus_one_range.combine_sum(&two_range))
            .is_unconstrained());
    }

    #[test]
    fn mul_add() {
        let a = RangeConstraint::<GoldilocksField>::from_mask(0x1u32);
        let b = RangeConstraint::from_mask(0xffu32);
        let c = a.multiple(512.into()).combine_sum(&b);
        assert_eq!(c, RangeConstraint::from_mask(0x2ff_u32));
        let d = a.multiple(-GoldilocksField::from(1)).combine_sum(&b);
        assert_eq!(
            d,
            RangeConstraint::from_range(-GoldilocksField::from(1), 0xff.into())
        );
    }

    #[test]
    fn multiple_negative() {
        let a: RangeConstraint<GoldilocksField> = RangeConstraint::from_range(0.into(), 12.into());
        assert_eq!(*a.mask(), 0xfu32.into());
        let b = a.multiple((-3).into());
        assert_eq!(*b.mask(), u64::MAX.into());
        assert_eq!(b.range(), (-GoldilocksField::from(36), 0.into()));
    }

    #[test]
    fn multiple_overflow() {
        let modulus = 0xffffffff00000001u64;
        // Test min-max range width around modulus
        let max_value = (modulus / 4 + 6).into();
        let a = RCg::from_range(7.into(), max_value);
        assert!(
            a.range_width().to_arbitrary_integer()
                * GoldilocksField::from(4u32).to_arbitrary_integer()
                <= GoldilocksField::modulus().to_arbitrary_integer()
        );
        assert!(
            a.range_width().to_arbitrary_integer()
                * GoldilocksField::from(5u32).to_arbitrary_integer()
                > GoldilocksField::modulus().to_arbitrary_integer()
        );
        assert_eq!(
            a.multiple(4.into()),
            RangeConstraint {
                min: 28.into(),
                max: max_value * GoldilocksField::from(4),
                mask: u64::MAX.into()
            }
        );
        assert_eq!(
            a.multiple(5.into()),
            RangeConstraint {
                min: 1.into(),
                max: 0.into(),
                mask: u64::MAX.into()
            }
        );
    }

    #[test]
    fn combinations() {
        let a: RangeConstraint<GoldilocksField> = RangeConstraint::from_max_bit(7);
        assert_eq!(a, RangeConstraint::from_mask(0xff_u32));
        let b = a.multiple(256.into());
        assert_eq!(b, RangeConstraint::from_mask(0xff00_u32));
        assert_eq!(b.combine_sum(&a), RangeConstraint::from_mask(0xffff_u32));
    }

    #[test]
    fn weird_combinations() {
        let a: RangeConstraint<GoldilocksField> = RangeConstraint::from_mask(0xf00f_u32);
        let b = a.multiple(256.into());
        assert_eq!(b, RangeConstraint::from_mask(0xf00f00_u32));
        assert_eq!(b.combine_sum(&a), RangeConstraint::from_mask(0xf0ff0f_u32));
    }

    #[test]
    fn interval_intersections() {
        type F = GoldilocksField;
        fn commutativity_test(a: (F, F), b: (F, F)) -> Option<(F, F)> {
            let direct = interval_intersection(a, b);
            let inverse = interval_intersection(b, a);
            assert_eq!(direct, inverse);

            direct
        }

        // Plain, no wrapping:

        // a is contained in b
        {
            let a = (50.into(), 60.into());
            assert_eq!(commutativity_test(a, (10.into(), 100.into())), Some(a));
        }

        // a has an intersection with b
        assert_eq!(
            commutativity_test((10.into(), 60.into()), (40.into(), 100.into())),
            Some((40.into(), 60.into()))
        );

        // a and b does not intersect
        assert_eq!(
            commutativity_test((10.into(), 40.into()), (60.into(), 100.into())),
            None
        );

        // Wrapping intervals:

        // a intersects with b both at the beginning and at the end
        // (should return the smallest of the two ranges)
        {
            let a = (10.into(), 100.into());
            assert_eq!(commutativity_test(a, (90.into(), 20.into())), Some(a));
        }

        // a intersects with the beginning of b, and almost intersects with the end
        assert_eq!(
            commutativity_test((21.into(), 100.into()), (90.into(), 20.into())),
            Some((90.into(), 100.into()))
        );

        // a intersects with the end of b, and almost intersects with the beginning
        assert_eq!(
            commutativity_test((10.into(), 89.into()), (90.into(), 20.into())),
            Some((10.into(), 20.into()))
        );

        // an intersection that contains zero
        assert_eq!(
            commutativity_test((F::from(-50), 10.into()), (F::from(-10), 50.into())),
            Some((F::from(-10), 10.into()))
        );

        // a intersects with b right before zero
        assert_eq!(
            commutativity_test((F::from(-50), F::from(-10)), (F::from(-20), 20.into())),
            Some((F::from(-20), F::from(-10)))
        );

        // a intersects with b right after zero
        assert_eq!(
            commutativity_test((10.into(), 50.into()), (F::from(-20), 20.into())),
            Some((10.into(), 20.into()))
        );

        // a is contained in b, both contains 0
        {
            let a = (F::from(-20), 20.into());
            assert_eq!(commutativity_test(a, (F::from(-50), 90.into())), Some(a));
        }

        // a is contained in b before 0
        {
            let a = (F::from(-20), F::from(-10));
            assert_eq!(commutativity_test(a, (F::from(-50), 90.into())), Some(a));
        }

        // a is contained in b after 0
        {
            let a = (10.into(), 20.into());
            assert_eq!(commutativity_test(a, (F::from(-50), 90.into())), Some(a));
        }
    }

    #[test]
    fn allows_value() {
        type F = GoldilocksField;
        let a = RangeConstraint::<F>::from_range(20.into(), 10.into());
        assert!(a.allows_value(5.into()));
        assert!(a.allows_value(10.into()));
        assert!(!a.allows_value(15.into()));
        assert!(a.allows_value(20.into()));
        assert!(a.allows_value(25.into()));
        let b = RangeConstraint::<F>::from_range(10.into(), 20.into());
        assert!(!b.allows_value(5.into()));
        assert!(b.allows_value(10.into()));
        assert!(b.allows_value(15.into()));
        assert!(b.allows_value(20.into()));
        assert!(!b.allows_value(25.into()));
    }

    #[test]
    fn conjunction() {
        // This mostly tests the refinement of the bounds from min-max to mask and vice-versa.

        type F = GoldilocksField;
        let x = RangeConstraint::<F>::from_range(100000.into(), 70.into())
            .conjunction(&RangeConstraint::from_mask(0xfffu32));
        assert_eq!(
            x,
            RangeConstraint {
                min: 0.into(),
                max: 70.into(),
                mask: 127u32.into(), // This mask is refined from the max value
            },
        );

        let y = RangeConstraint::<F>::from_mask(0xfff000u32)
            .conjunction(&RangeConstraint::from_mask(0xff00u32));
        assert_eq!(
            y,
            RangeConstraint {
                min: 0.into(),
                max: 0xf000u32.into(), // this max value is derived from the mask.
                mask: 0xf000u32.into(),
            },
        );
    }

    #[test]
    fn disjunction() {
        type F = GoldilocksField;
        let a = RangeConstraint::<F>::from_range(20.into(), 10.into());
        let b = RangeConstraint::<F>::from_range(30.into(), 15.into());
        let d = a.disjunction(&b);
        assert!(d.allows_value(5.into()));
        assert!(d.allows_value(10.into()));
        assert!(d.allows_value(15.into()));
        assert!(!d.allows_value(18.into()));
        assert!(d.allows_value(20.into()));
        assert!(d.allows_value(25.into()));
    }

    #[test]
    fn disjunction_combinations() {
        type F = GoldilocksField;
        let lower = [10, 10000, 100060];
        let upper = [20, 10006, 100070];
        let test = [
            5, 10, 15, 20, 900, 10000, 10004, 10006, 10010, 100055, 100060, 100065, 100070, 100075,
        ]
        .iter()
        .map(|t| F::from(*t))
        .collect_vec();
        for (l1, u1) in lower.iter().cartesian_product(upper.iter()) {
            for (l2, u2) in lower.iter().cartesian_product(upper.iter()) {
                let a = RangeConstraint::<F>::from_range((*l1).into(), (*u1).into());
                let b = RangeConstraint::<F>::from_range((*l2).into(), (*u2).into());
                let c = a.disjunction(&b);
                for t in &test {
                    // Range constraints are allowed to be less strict, so we can only test one direction.
                    if !c.allows_value(*t) {
                        assert!(!a.allows_value(*t) || !b.allows_value(*t));
                    }
                }
            }
        }
    }

    #[test]
    fn is_disjoint() {
        type F = GoldilocksField;
        let a = RangeConstraint::<F>::from_range(10.into(), 20.into());
        let b = RangeConstraint::<F>::from_range(20.into(), 30.into());
        assert!(!a.is_disjoint(&b));
        let b = RangeConstraint::<F>::from_range(21.into(), 30.into());
        assert!(a.is_disjoint(&b));
        let b = RangeConstraint::<F>::from_range(21.into(), 9.into());
        assert!(a.is_disjoint(&b));
        let b = RangeConstraint::<F>::from_range(21.into(), 10.into());
        assert!(!a.is_disjoint(&b));

        let b = RangeConstraint::<F>::from_mask(0x100u32);
        assert!(b.range() == (0.into(), 0x100u32.into()));
        assert!(a.is_disjoint(&b));

        let c = RangeConstraint::<F>::from_mask(0xffu32);
        // They are not disjoint, because they both allow zero.
        assert!(!c.is_disjoint(&b));
        let d = c.conjunction(&RangeConstraint::from_range(1.into(), 5000.into()));
        assert!(d.is_disjoint(&b));
    }

    #[test]
    fn is_unconstrained() {
        type F = BabyBearField;
        assert!(RangeConstraint::<F>::unconstrained().is_unconstrained());
        let a = RangeConstraint::<F>::from_range(0.into(), F::from(0) - F::from(1));
        assert!(a.is_unconstrained());
        let b = RangeConstraint::<F>::from_range(5.into(), 4.into());
        assert!(b.is_unconstrained());
        let c = RangeConstraint::<F>::from_mask(!F::from(0).to_integer());
        assert!(c.is_unconstrained());
        let x = RangeConstraint::<F>::from_range(0.into(), F::from(10));
        assert!(!x.is_unconstrained());
        let y = RangeConstraint::<F>::from_range(F::from(-1), F::from(0));
        assert!(!y.is_unconstrained());
    }
}


================================================
FILE: constraint-solver/src/reachability.rs
================================================
use std::collections::HashSet;
use std::fmt::Display;
use std::hash::Hash;

use itertools::Itertools;

use crate::indexed_constraint_system::IndexedConstraintSystem;
use crate::runtime_constant::RuntimeConstant;

/// Returns the set of all variables reachable from an initial set via shared constraints
/// (algebraic constraints and bus interactions).
/// The returned set also contains the initial variables.
pub fn reachable_variables<T, V>(
    initial_variables: impl IntoIterator<Item = V>,
    constraint_system: &IndexedConstraintSystem<T, V>,
) -> HashSet<V>
where
    T: RuntimeConstant,
    V: Clone + Ord + Hash + Display,
{
    reachable_variables_except_blocked(initial_variables, std::iter::empty(), constraint_system)
}

/// Returns the set of all variables reachable from an initial set via shared constraints
/// (algebraic constraints and bus interactions).
/// The set of blocking variables is a barrier that stops the reachability search, in the
/// sense that we consider constraints that can also contain blocking variables, but we
/// only continue the search from the non-blocking variables in constraints.
/// The returned set contains reachable blocking variables and the initial variables.
pub fn reachable_variables_except_blocked<T, V>(
    initial_variables: impl IntoIterator<Item = V>,
    blocking_variables: impl IntoIterator<Item = V>,
    constraint_system: &IndexedConstraintSystem<T, V>,
) -> HashSet<V>
where
    T: RuntimeConstant,
    V: Clone + Ord + Hash + Display,
{
    let mut reachable_variables = initial_variables.into_iter().collect::<HashSet<_>>();
    let blocking_variables = blocking_variables.into_iter().collect::<HashSet<_>>();

    loop {
        let size_before = reachable_variables.len();
        let reachable_variables_vec = reachable_variables.iter().cloned().collect_vec();
        for constraint in
            constraint_system.constraints_referencing_variables(&reachable_variables_vec)
        {
            if constraint
                .referenced_unknown_variables()
                .any(|var| reachable_variables.contains(var) && !blocking_variables.contains(var))
            {
                // This constraint is connected to a reachable variable,
                // add all variables of this constraint.
                reachable_variables.extend(constraint.referenced_unknown_variables().cloned());
            }
        }
        if reachable_variables.len() == size_before {
            break;
        }
    }
    reachable_variables
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/driver.rs
================================================
use std::collections::{HashMap, HashSet};
use std::fmt::Display;
use std::hash::Hash;

use itertools::Itertools;
use powdr_number::FieldElement;

use crate::range_constraint::RangeConstraint;
use crate::rule_based_optimizer::new_var_generator::NewVarRequest;
use crate::{
    algebraic_constraint::AlgebraicConstraint,
    constraint_system::{
        BusInteraction, BusInteractionHandler, ComputationMethod, ConstraintSystem, DerivedVariable,
    },
    grouped_expression::{GroupedExpression, RangeConstraintProvider},
    indexed_constraint_system::IndexedConstraintSystem,
    inliner::DegreeBound,
    rule_based_optimizer::{
        environment::Environment,
        item_db::ItemDB,
        new_var_generator::NewVarGenerator,
        rules,
        types::{Action, Expr, Var},
    },
    runtime_constant::VarTransformable,
};

pub type VariableAssignment<T, V> = (V, GroupedExpression<T, V>);

/// Perform rule-based optimization on the given constraint system. Returns the modified
/// system and a list of variable assignments that were made during the optimization.
/// The rules can also alter algebraic constraints and bus interactions, those alterations
/// will not be visible in the list of substitutions.
///
/// If a degree bound is NOT given, then the degrees of the returned system will not increase.
/// If it is given, then the degrees may increase, but will stay within the bound.
///
/// The function `new_var` can be used to generate a fresh variable, each call should
/// return a fresh variable and the parameter can be used as a name suggestion.
pub fn rule_based_optimization<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    mut system: IndexedConstraintSystem<T, V>,
    range_constraints: impl RangeConstraintProvider<T, V>,
    bus_interaction_handler: impl BusInteractionHandler<T> + Clone,
    new_var: &mut impl FnMut(&str) -> V,
    degree_bound: Option<DegreeBound>,
) -> (IndexedConstraintSystem<T, V>, Vec<VariableAssignment<T, V>>) {
    let mut assignments = vec![];
    let mut var_mapper = system
        .referenced_unknown_variables()
        .cloned()
        // Sorting is important here so that the order for V translates
        // to the same order on Var.
        .sorted()
        .collect::<ItemDB<V, Var>>();

    // The expression database will be used to map expressions and their IDs.
    // New expressions are created during rule execution and thus new IDs need
    // to be allocated. Because of lifetime issues, we pass it into
    // `env` and extract it again after the rules have run.
    let mut expr_db = Some(ItemDB::<GroupedExpression<T, Var>, Expr>::default());

    let mut range_constraints_on_vars: HashMap<Var, RangeConstraint<T>> = system
        .referenced_unknown_variables()
        .map(|v| (var_mapper.id(v), range_constraints.get(v)))
        .filter(|(_, rc)| !rc.is_unconstrained())
        .collect();

    loop {
        // Transform the constraint system into a simpler representation
        // using IDs for variables and expressions.
        let (algebraic_constraints, bus_interactions) =
            transform_constraint_system(&system, &var_mapper, expr_db.as_mut().unwrap());

        let duplicate_vars = system
            .referenced_unknown_variables()
            .map(|v| var_mapper.id(v))
            .duplicates()
            .collect::<HashSet<_>>();
        let single_occurrence_vars = system
            .referenced_unknown_variables()
            .map(|v| var_mapper.id(v))
            .collect::<HashSet<_>>()
            .difference(&duplicate_vars)
            .copied()
            .collect::<HashSet<_>>();

        // Create the "environment" singleton that can be used by the rules
        // to query information from the outside world.
        let env = Environment::<T>::new(
            expr_db.take().unwrap(),
            var_mapper
                .iter()
                .map(|(id, var)| (id, var.to_string()))
                .collect(),
            single_occurrence_vars,
            // The NewVarGenerator will be used to generate fresh variables.
            // because of lifetime and determinism issues, we pass the next ID that
            // the var_mapper would use here and then re-create the
            // variables in a deterministic sequence further down.
            NewVarGenerator::new(var_mapper.next_free_id()),
        );

        // Create the rule system and populate it with the initial facts.
        let mut rt = rules::Crepe::default();

        // It would be better to handle bus interactions inside the rule system,
        // but it is difficult because of the vector and the combinatorial
        // explosion of the range constraints, so we just determine the range constraints
        // on the bus interaction fields now.
        rt.extend(
            system
                .bus_interactions()
                .iter()
                .zip(bus_interactions)
                .flat_map(|(bus_inter, bus_inter_transformed)| {
                    let updated_rcs = bus_interaction_handler
                        .handle_bus_interaction(bus_inter.to_range_constraints(&range_constraints))
                        .fields()
                        .cloned()
                        .collect_vec();
                    bus_inter_transformed
                        .fields()
                        .cloned()
                        .zip(updated_rcs)
                        .collect_vec()
                })
                .filter(|(_, rc)| !rc.is_unconstrained())
                .into_grouping_map()
                .reduce(|rc1, _, rc2| rc1.conjunction(&rc2))
                .into_iter()
                .map(|(e, rc)| rules::InitialRangeConstraintOnExpression(e, rc)),
        );
        rt.extend(
            range_constraints_on_vars
                .iter()
                .map(|(var, rc)| rules::RangeConstraintOnVar(*var, *rc)),
        );
        rt.extend(
            algebraic_constraints
                .iter()
                .copied()
                .map(rules::InitialAlgebraicConstraint),
        );
        rt.extend(std::iter::once(rules::Env(&env)));

        // Uncomment this to get a runtime profile of the individual
        // rules.
        // let ((actions, large_actions), profile) = rt.run_with_profiling();
        // profile.report();
        let (actions, large_actions) = rt.run();
        let (expr_db_, new_var_generator) = env.terminate();

        let mut progress = false;
        // Try to execute the actions that were determined by the rules.
        // Since the rules are "non-deterministic", some actions might conflict
        // (imagine x := 7, x := y and y := 7, they are all consistent but
        // some will fail depending on the order in which they are applied).
        // We try to ensure that at least the outcome is deterministic by
        // sorting the actions.

        // Collect replacement actions to process them in batch
        let mut replacement_actions = Vec::new();

        // Data structure to determine and record the final deterministic IDs of new variables
        let mut new_vars = new_var_generator.requests();

        for action in actions.into_iter().map(|a| a.0).sorted() {
            match action {
                Action::UpdateRangeConstraintOnVar(var, rc) => {
                    let existing_rc = range_constraints_on_vars
                        .get(&var)
                        .cloned()
                        .unwrap_or_default();
                    let new_rc = existing_rc.conjunction(&rc);
                    if new_rc != existing_rc {
                        if let Some(val) = new_rc.try_to_single_value() {
                            system.substitute_by_known(&var_mapper[var], &val);
                            assignments.push((
                                var_mapper[var].clone(),
                                GroupedExpression::from_number(val),
                            ));
                        } else {
                            range_constraints_on_vars.insert(var, new_rc);
                        }
                        progress = true;
                    }
                }
                Action::SubstituteVariableByConstant(var, val) => {
                    system.substitute_by_known(&var_mapper[var], &val);
                    assignments
                        .push((var_mapper[var].clone(), GroupedExpression::from_number(val)));
                    progress = true;
                }
                Action::SubstituteVariableByVariable(v1, v2) => {
                    assignments.push((
                        var_mapper[v1].clone(),
                        GroupedExpression::from_unknown_variable(var_mapper[v2].clone()),
                    ));
                    system.substitute_by_unknown(
                        &var_mapper[v1],
                        &GroupedExpression::from_unknown_variable(var_mapper[v2].clone()),
                    );
                    progress = true;
                }
                Action::ReplaceAlgebraicConstraintBy(e1, replacement) => {
                    replacement_actions.push(ReplacementAction::new(
                        [e1],
                        [replacement],
                        &mut |e| {
                            undo_variable_transform_and_recreate_new_variables(
                                &expr_db_[e],
                                &mut var_mapper,
                                &mut new_vars,
                                &mut system,
                                new_var,
                            )
                        },
                    ));
                }
            }
        }
        for action in large_actions.into_iter().map(|a| a.0).sorted() {
            replacement_actions.push(ReplacementAction::new(
                action.to_replace.iter().flatten().copied(),
                action.replace_by.iter().flatten().copied(),
                &mut |e| {
                    undo_variable_transform_and_recreate_new_variables(
                        &expr_db_[e],
                        &mut var_mapper,
                        &mut new_vars,
                        &mut system,
                        new_var,
                    )
                },
            ));
        }

        replacement_actions.sort();
        progress |=
            batch_replace_algebraic_constraints(&mut system, replacement_actions, degree_bound);

        if !progress {
            break;
        }
        expr_db = Some(expr_db_);
    }
    system.retain_algebraic_constraints(|c| !c.is_redundant());
    (system, assignments)
}

/// Mainly transforms a `GroupedExpression<T, Var>` back into a `GroupedExpression<T, V>`, but also re-creates
/// any variables that were newly generated inside the expression and adds potential computation methods
/// to the constraint system.
/// This is needed in order to ensure a deterministic creation order for new variables.
fn undo_variable_transform_and_recreate_new_variables<
    T: FieldElement,
    V: Hash + Eq + Ord + Clone + Display,
>(
    expr: &GroupedExpression<T, Var>,
    var_mapper: &mut ItemDB<V, Var>,
    new_vars: &mut HashMap<Var, NewVarRequest<T>>,
    system: &mut IndexedConstraintSystem<T, V>,
    new_var_callback: &mut impl FnMut(&str) -> V,
) -> GroupedExpression<T, V> {
    expr.transform_var_type(&mut |v| {
        let v = if let Some(request) = &mut new_vars.get_mut(v) {
            if request.final_id.is_none() {
                // We have not assigned a final ID yet, request a new variable from the global
                // callback and insert it into the variable ID database to get a new ID.
                let v = new_var_callback(&request.prefix);
                request.final_id = Some(var_mapper.insert(&v));
                let computation_method = undo_variable_transform_in_computation_method(
                    &request.computation_method,
                    var_mapper,
                );
                system.extend(ConstraintSystem {
                    derived_variables: vec![DerivedVariable {
                        variable: v.clone(),
                        computation_method,
                    }],
                    ..Default::default()
                });
            }
            request.final_id.unwrap()
        } else {
            *v
        };
        var_mapper[v].clone()
    })
}

/// A single replacement operation: replace `replace` constraints with `replace_by` constraints.
#[derive(PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct ReplacementAction<T, V> {
    /// Constraints to be replaced.
    pub(crate) replace: Vec<GroupedExpression<T, V>>,
    /// Replacement constraints.
    pub(crate) replace_by: Vec<GroupedExpression<T, V>>,
}

impl<T: FieldElement, V: Hash + Eq + Ord + Clone + Display> ReplacementAction<T, V> {
    /// Creates a new ReplacementAction from expression IDs, performing variable transformation.
    fn new(
        replace: impl IntoIterator<Item = Expr>,
        replace_by: impl IntoIterator<Item = Expr>,
        mut transform: &mut impl FnMut(Expr) -> GroupedExpression<T, V>,
    ) -> Self {
        let replace = replace.into_iter().map(&mut transform).collect_vec();
        let replace_by = replace_by.into_iter().map(&mut transform).collect_vec();
        Self {
            replace,
            replace_by,
        }
    }
}

/// Checks if a replacement action satisfies the degree bound constraints.
/// Returns true if the replacement is allowed, false otherwise.
///
/// If degree_bound is None, the replacement is only allowed if the degree does not increase.
/// If degree_bound is Some(bound), the replacement is allowed if the new degree stays within the bound.
fn is_replacement_within_degree_bound<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    replacement: &ReplacementAction<T, V>,
    degree_bound: Option<DegreeBound>,
) -> bool {
    let max_old_degree = replacement
        .replace
        .iter()
        .map(|e| e.degree())
        .max()
        .unwrap_or(0);
    let max_new_degree = replacement
        .replace_by
        .iter()
        .map(|e| e.degree())
        .max()
        .unwrap_or(0);

    // Check if the degree increase is acceptable
    let degree_increase = max_new_degree > max_old_degree;
    match degree_bound {
        None => !degree_increase,
        Some(bound) => max_new_degree <= bound.identities,
    }
}

/// Batch replaces multiple sets of algebraic constraints in a single pass through the constraint system.
/// Returns true if at least one replacement was successful.
///
/// If degree_bound is None, replacements are only done if the degree does not increase.
/// If degree_bound is Some(bound), replacements are only done if the degree stays within the bound.
///
/// Consults the `new_var_generator` and re-assigns the IDs of all generated variables such that they
/// are deterministically generated.
pub(crate) fn batch_replace_algebraic_constraints<
    T: FieldElement,
    V: Hash + Eq + Ord + Clone + Display,
>(
    system: &mut IndexedConstraintSystem<T, V>,
    replacements: Vec<ReplacementAction<T, V>>,
    degree_bound: Option<DegreeBound>,
) -> bool {
    // Filter out replacements that violate degree bounds
    // and also filter out duplicate left hand sides.
    let valid_replacements: Vec<_> = replacements
        .into_iter()
        .filter(|replacement| {
            let within_bound = is_replacement_within_degree_bound(replacement, degree_bound);
            if !within_bound {
                log::debug!(
                    "Skipping replacement of {} by {} due to degree constraints.",
                    replacement.replace.iter().format(", "),
                    replacement.replace_by.iter().format(", ")
                );
            }
            within_bound
        })
        .map(|replacement| ReplacementAction {
            replace: replacement.replace.into_iter().unique().collect(),
            replace_by: replacement.replace_by,
        })
        .collect();

    // Build a map from constraints to search for to their index in the replacement list.
    // Note that the same expression can be present in multiple lists!
    let replace_to_index: HashMap<&GroupedExpression<T, V>, Vec<usize>> = valid_replacements
        .iter()
        .enumerate()
        .flat_map(|(i, r)| r.replace.iter().map(move |e| (e, i)))
        .into_group_map();

    // Compute which of the expressions to search for have been found for each replacement action.
    let mut replacement_found: Vec<HashSet<&GroupedExpression<T, V>>> =
        vec![Default::default(); valid_replacements.len()];

    for constraint in system.algebraic_constraints() {
        if let Some(replacement_indices) = replace_to_index.get(&constraint.expression) {
            for &i in replacement_indices {
                replacement_found[i].insert(&constraint.expression);
            }
        }
    }

    let mut constraints_to_remove: HashSet<&GroupedExpression<T, V>> = HashSet::new();
    let mut replacement_constraints = Vec::new();

    for (index, replacement) in valid_replacements.iter().enumerate() {
        if replacement_found[index].len() != replacement.replace.len() {
            log::debug!(
                "Incomplete replacement: wanted to replace {} but found only {}/{} constraints in the system.",
                replacement.replace.iter().format(", "),
                replacement_found[index].len(),
                replacement.replace.len()
            );
            continue;
        }

        // Check if any of this replacement's constraints to replace have already been claimed
        let has_conflict = replacement
            .replace
            .iter()
            .any(|replace_expr| constraints_to_remove.contains(replace_expr));

        if has_conflict {
            log::debug!(
                "Skipping replacement of {} due to conflict with earlier replacement.",
                replacement.replace.iter().format(", ")
            );
        } else {
            // No conflict, this replacement can proceed
            constraints_to_remove.extend(replacement.replace.iter());
            replacement_constraints.extend(replacement.replace_by.iter().cloned());
        }
    }

    if constraints_to_remove.is_empty() {
        // All replacements were skipped due to conflicts
        return false;
    }

    // Remove old constraints and add new ones
    system.retain_algebraic_constraints(|c| !constraints_to_remove.contains(&c.expression));
    system.add_algebraic_constraints(
        replacement_constraints
            .into_iter()
            .map(AlgebraicConstraint::assert_zero),
    );

    true
}

/// Transform the constraint system such that variables and expressions are
/// assigned IDs.
fn transform_constraint_system<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    system: &IndexedConstraintSystem<T, V>,
    var_mapper: &ItemDB<V, Var>,
    expression_db: &mut ItemDB<GroupedExpression<T, Var>, Expr>,
) -> (Vec<Expr>, Vec<BusInteraction<Expr>>) {
    let algebraic_constraints = system
        .system()
        .algebraic_constraints
        .iter()
        .map(|c| transform_variables(&c.expression, var_mapper))
        .map(|e| expression_db.insert_owned(e))
        .collect_vec();
    let bus_interactions: Vec<BusInteraction<Expr>> = system
        .system()
        .bus_interactions
        .iter()
        .map(|bus_inter| {
            bus_inter
                .fields()
                .map(|f| transform_variables(f, var_mapper))
                .map(|e| expression_db.insert_owned(e))
                .collect()
        })
        .collect_vec();
    (algebraic_constraints, bus_interactions)
}

/// Transform the variable type in the expression to use `Var` instead of `V`.
fn transform_variables<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    expr: &GroupedExpression<T, V>,
    var_mapper: &ItemDB<V, Var>,
) -> GroupedExpression<T, Var> {
    expr.transform_var_type(&mut |v| var_mapper.id(v))
}

/// Undo the effect of `transform_variables`, transforming from `Var` back to `V`.
fn undo_variable_transform<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    expr: &GroupedExpression<T, Var>,
    var_mapper: &ItemDB<V, Var>,
) -> GroupedExpression<T, V> {
    expr.transform_var_type(&mut |v| var_mapper[*v].clone())
}

/// Undo the effect of `transform_variables` on a computation method.
fn undo_variable_transform_in_computation_method<
    T: FieldElement,
    V: Hash + Eq + Ord + Clone + Display,
>(
    method: &ComputationMethod<T, GroupedExpression<T, Var>>,
    var_mapper: &ItemDB<V, Var>,
) -> ComputationMethod<T, GroupedExpression<T, V>> {
    match method {
        ComputationMethod::Constant(c) => ComputationMethod::Constant(*c),
        ComputationMethod::QuotientOrZero(numerator, denominator) => {
            ComputationMethod::QuotientOrZero(
                undo_variable_transform(numerator, var_mapper),
                undo_variable_transform(denominator, var_mapper),
            )
        }
    }
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/environment.rs
================================================
use std::{
    cell::RefCell,
    collections::{HashMap, HashSet},
    hash::Hash,
};

use itertools::{EitherOrBoth, Itertools};
use powdr_number::FieldElement;

use crate::{
    constraint_system::ComputationMethod,
    grouped_expression::GroupedExpression,
    rule_based_optimizer::{
        item_db::ItemDB,
        new_var_generator::NewVarGenerator,
        types::{Expr, Var},
    },
    runtime_constant::VarTransformable,
};

/// The Environment in the main method to access information about
/// the constraint system. It allows rules to translate
/// the opaque Expr identifiers into GroupedExpressions and perform
/// actions on them.
/// It is available to the rules as a singleton with interior mutability.
pub struct Environment<T: FieldElement> {
    expressions: RefCell<ItemDB<GroupedExpression<T, Var>, Expr>>,
    var_to_string: HashMap<Var, String>,

    /// Variables that only occurr once in the system
    /// (also only once in the constraint they occur in).
    single_occurrence_variables: HashSet<Var>,
    new_var_generator: RefCell<NewVarGenerator<T>>,
}

impl<T: FieldElement> PartialEq for Environment<T> {
    fn eq(&self, _other: &Self) -> bool {
        // Environment is a singleton.
        true
    }
}

impl<T: FieldElement> Eq for Environment<T> {}

impl<T: FieldElement> PartialOrd for Environment<T> {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        // Environment is a singleton.
        Some(self.cmp(other))
    }
}

impl<T: FieldElement> Ord for Environment<T> {
    fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
        // Environment is a singleton.
        std::cmp::Ordering::Equal
    }
}

impl<T: FieldElement> Hash for Environment<T> {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        // Environment is a singleton.
        0.hash(state);
    }
}

impl<T: FieldElement> Environment<T> {
    pub fn new(
        expressions: ItemDB<GroupedExpression<T, Var>, Expr>,
        var_to_string: HashMap<Var, String>,
        single_occurrence_variables: HashSet<Var>,
        new_var_generator: NewVarGenerator<T>,
    ) -> Self {
        Self {
            expressions: RefCell::new(expressions),
            var_to_string,
            single_occurrence_variables,
            new_var_generator: RefCell::new(new_var_generator),
        }
    }

    /// Re-extract re-usable components after the rules have run.
    pub fn terminate(self) -> (ItemDB<GroupedExpression<T, Var>, Expr>, NewVarGenerator<T>) {
        (
            self.expressions.into_inner(),
            self.new_var_generator.into_inner(),
        )
    }

    /// Turns a GroupedExpression into the corresponding Expr,
    /// allocating a new ID if it is not yet present.
    /// Use this function when you only have a reference to the expression.
    pub fn insert(&self, expr: &GroupedExpression<T, Var>) -> Expr {
        self.expressions.borrow_mut().insert(expr)
    }

    /// Turns a GroupedExpression into the corresponding Expr,
    /// allocating a new ID if it is not yet present.
    /// Use this function instead of `insert` when you own the expression.    
    #[allow(dead_code)]
    pub fn insert_owned(&self, expr: GroupedExpression<T, Var>) -> Expr {
        self.expressions.borrow_mut().insert_owned(expr)
    }

    /// Turns an Expr into an owned GroupedExpression.
    /// This is expensive since it clones the expression.
    pub fn extract(&self, expr: Expr) -> GroupedExpression<T, Var> {
        self.expressions.borrow()[expr].clone()
    }

    pub fn new_var(
        &self,
        prefix: &str,
        method: ComputationMethod<T, GroupedExpression<T, Var>>,
    ) -> Var {
        self.new_var_generator.borrow_mut().generate(prefix, method)
    }

    pub fn single_occurrence_variables(&self) -> impl Iterator<Item = &Var> {
        self.single_occurrence_variables.iter()
    }

    /// Split Expr into head and tail, i.e., expr = head + tail
    pub fn try_split_into_head_tail(&self, expr: Expr) -> Option<(Expr, Expr)> {
        let db = self.expressions.borrow();
        let expr = db[expr].clone();
        drop(db);
        let (head, tail) = expr.try_split_head_tail()?;
        Some((self.insert_owned(head), self.insert_owned(tail)))
    }

    #[allow(dead_code)]
    /// If this returns Some(n) then the expression is affine
    /// and contains n variables.
    pub fn affine_var_count(&self, expr: Expr) -> Option<usize> {
        let db = self.expressions.borrow();
        let expr = &db[expr];
        expr.is_affine().then(|| expr.linear_components().len())
    }

    /// If this returns Some((coeff, var, offset)) then the expression is affine
    /// and equals `coeff * var + offset`.
    pub fn try_to_affine(&self, expr: Expr) -> Option<(T, Var, T)> {
        let db = self.expressions.borrow();
        let expr = &db[expr];
        if !expr.is_affine() {
            return None;
        }
        let (var, coeff) = expr.linear_components().exactly_one().ok()?;
        Some((*coeff, *var, *expr.constant_offset()))
    }

    pub fn try_to_number(&self, expr: Expr) -> Option<T> {
        let db = self.expressions.borrow();
        let expr = &db[expr];
        expr.try_to_number()
    }

    /// Runs the function `f` on the expression identified by `expr`,
    /// passing `args` as additional arguments.
    /// This function is needed because we cannot return
    /// references to GroupedExpression due to the interior mutability.
    pub fn on_expr<Args, Ret>(
        &self,
        expr: Expr,
        args: Args,
        f: impl Fn(&GroupedExpression<T, Var>, Args) -> Ret,
    ) -> Ret {
        let db = self.expressions.borrow();
        let expr = &db[expr];
        f(expr, args)
    }

    /// If this returns Some(e1, e2) then the expression equals e1 * e2.
    pub fn try_as_single_product(&self, expr: Expr) -> Option<(Expr, Expr)> {
        let (l, r) = {
            let db = self.expressions.borrow();
            let (l, r) = db[expr].try_as_single_product()?;
            (l.clone(), r.clone())
        };
        // TODO eventually, l and r are cloned.
        // if we change GroupedExpression to use `Expr` for the recursion, we do not
        // have to insert everything multiple times.
        Some((self.insert(&l), self.insert(&r)))
    }

    /// If this returns Some((v1, v2, factor)), then
    /// a is obtained from b * factor by substituting v2 by v1.
    pub fn differ_in_exactly_one_variable(&self, a_id: Expr, b_id: Expr) -> Option<(Var, Var, T)> {
        let db = self.expressions.borrow();
        let a = &db[a_id];
        let b = &db[b_id];
        if !a.is_affine()
            || !b.is_affine()
            || a.linear_components().len() != b.linear_components().len()
            || a.linear_components().len() < 2
        {
            return None;
        }
        // First find the variables, ignoring the coefficients.
        let (v1, v2) = a
            .linear_components()
            .merge_join_by(b.linear_components(), |(v1, _), (v2, _)| v1.cmp(v2))
            .filter(|either| !matches!(either, EitherOrBoth::Both(_, _)))
            .collect_tuple()?;
        let (left_var, right_var, factor) = match (v1, v2) {
            (EitherOrBoth::Left((lv, lc)), EitherOrBoth::Right((rv, rc)))
            | (EitherOrBoth::Right((rv, rc)), EitherOrBoth::Left((lv, lc))) => {
                (*lv, *rv, *lc / *rc)
            }
            _ => return None,
        };
        // Now verify that the other coefficients agree with the factor
        if *a.constant_offset() != *b.constant_offset() * factor {
            return None;
        }
        if !a
            .linear_components()
            .filter(|(v, _)| **v != left_var)
            .map(|(_, c)| *c)
            .eq(b
                .linear_components()
                .filter(|(v, _)| **v != right_var)
                .map(|(_, bc)| *bc * factor))
        {
            return None;
        }

        Some((left_var, right_var, factor))
    }

    /// Substitutes the variable `var` by the constant `value` in the expression `e`
    /// and returns the resulting expression.
    #[allow(dead_code)]
    pub fn substitute_by_known(&self, e: Expr, var: Var, value: T) -> Expr {
        let expr = {
            let db = self.expressions.borrow();
            let mut expr = db[e].clone();
            // expr.substitute_by_known(&var, &value);
            expr.substitute_simple(&var, value);
            expr
        };
        self.insert_owned(expr)
    }

    /// Substitutes the variable `var` by the variable `replacement` in the expression `e`
    /// and returns the resulting expression.
    #[allow(dead_code)]
    pub fn substitute_by_var(&self, e: Expr, var: Var, replacement: Var) -> Expr {
        let expr = {
            let db = self.expressions.borrow();
            let mut expr = db[e].clone();
            expr.substitute_by_unknown(
                &var,
                &GroupedExpression::from_unknown_variable(replacement),
            );
            expr
        };
        self.insert_owned(expr)
    }

    #[allow(dead_code)]
    pub fn format_expr(&self, expr: Expr) -> String {
        let db = self.expressions.borrow();
        db[expr]
            .transform_var_type(&mut |v| self.format_var(*v))
            .to_string()
    }

    #[allow(dead_code)]
    pub fn format_var(&self, var: Var) -> String {
        self.var_to_string
            .get(&var)
            .cloned()
            .unwrap_or_else(|| var.to_string())
    }
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/item_db.rs
================================================
use std::collections::HashMap;
use std::hash::Hash;
use std::ops::Index;

use derivative::Derivative;

/// A database of items that are assigned consecutive identifiers
/// and which can translate back and forth between identifiers
/// and items.

#[derive(Derivative)]
#[derivative(Default(bound = ""))]
pub struct ItemDB<Item, Ident> {
    items: Vec<Item>,
    reverse: HashMap<Item, usize>,
    _phantom: std::marker::PhantomData<Ident>,
}

impl<Item, Ident> FromIterator<Item> for ItemDB<Item, Ident>
where
    Item: Clone + Hash + Eq,
{
    fn from_iter<T: IntoIterator<Item = Item>>(iter: T) -> Self {
        let items = iter.into_iter().collect::<Vec<_>>();
        let reverse = items
            .iter()
            .enumerate()
            .map(|(i, v)| (v.clone(), i))
            .collect();
        Self {
            items,
            reverse,
            _phantom: std::marker::PhantomData,
        }
    }
}

impl<Item, Ident> Index<Ident> for ItemDB<Item, Ident>
where
    Ident: Into<usize>,
{
    type Output = Item;
    fn index(&self, index: Ident) -> &Self::Output {
        &self.items[index.into()]
    }
}

impl<Item, Ident> ItemDB<Item, Ident>
where
    Item: Clone + Hash + Eq,
    Ident: From<usize> + Copy,
{
    fn insert_owned_new(&mut self, item: Item) -> Ident {
        let id = self.items.len();
        self.items.push(item.clone());
        self.reverse.insert(item, id);
        Ident::from(id)
    }

    /// Inserts the item if not already present, returning its identifier.
    /// Use this function over `insert_owned` when you only have a
    /// reference to the item.
    pub fn insert(&mut self, item: &Item) -> Ident {
        if let Some(&id) = self.reverse.get(item) {
            Ident::from(id)
        } else {
            self.insert_owned_new(item.clone())
        }
    }

    /// Inserts the item if not already present, returning its identifier.
    /// Use this function over `insert` when you have ownership of the item.
    pub fn insert_owned(&mut self, item: Item) -> Ident {
        if let Some(&id) = self.reverse.get(&item) {
            Ident::from(id)
        } else {
            self.insert_owned_new(item)
        }
    }

    pub fn id(&self, item: &Item) -> Ident {
        self.reverse.get(item).map(|&id| Ident::from(id)).unwrap()
    }

    pub fn iter(&self) -> impl Iterator<Item = (Ident, &Item)> {
        self.items
            .iter()
            .enumerate()
            .map(|(i, item)| (Ident::from(i), item))
    }

    // TODO avoid using this (as pub)
    pub fn next_free_id(&self) -> usize {
        self.items.len()
    }
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/mod.rs
================================================
mod driver;
mod environment;
mod item_db;
mod new_var_generator;
mod rules;
mod types;

#[cfg(test)]
mod tests;

pub use driver::rule_based_optimization;
pub use driver::VariableAssignment;


================================================
FILE: constraint-solver/src/rule_based_optimizer/new_var_generator.rs
================================================
use std::collections::HashMap;

use crate::{
    constraint_system::ComputationMethod, grouped_expression::GroupedExpression,
    rule_based_optimizer::types::Var,
};

/// A request for a new variable from the rule system. The variable will be assigned a tentative ID and name
/// generated from the prefix. Both the ID and the name will be re-generated when the replacements are processed.
pub struct NewVarRequest<T> {
    /// The final ID computed when the replacements are processed.
    pub final_id: Option<Var>,
    /// A prefix to be used for generating a descriptive name.
    pub prefix: String,
    /// The way to compute the variable during witness generation.
    pub computation_method: ComputationMethod<T, GroupedExpression<T, Var>>,
}

pub struct NewVarGenerator<T> {
    counter: usize,
    requests: HashMap<Var, NewVarRequest<T>>,
}

impl<T> NewVarGenerator<T> {
    pub fn new(initial_counter: usize) -> Self {
        Self {
            counter: initial_counter,
            requests: Default::default(),
        }
    }

    pub fn generate(
        &mut self,
        prefix: &str,
        computation_method: ComputationMethod<T, GroupedExpression<T, Var>>,
    ) -> Var {
        let var = Var::from(self.counter);
        self.requests.insert(
            var,
            NewVarRequest {
                final_id: None,
                prefix: prefix.to_string(),
                computation_method,
            },
        );
        self.counter += 1;
        var
    }

    pub fn requests(self) -> HashMap<Var, NewVarRequest<T>> {
        self.requests
    }
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/rules.rs
================================================
#![allow(clippy::iter_over_hash_type)]
// This is about a warning about interior mutability for the key
// `Env`. We need it and it is probably fine.
#![allow(clippy::mutable_key_type)]

use crepe::crepe;
use itertools::Itertools;
use num_traits::One;
use powdr_number::FieldElement;

use crate::{
    constraint_system::ComputationMethod,
    grouped_expression::{GroupedExpression, GroupedExpressionComponent},
    range_constraint::RangeConstraint,
    rule_based_optimizer::{
        environment::Environment,
        types::{Action, Expr, ReplaceConstraintsAction, Var},
    },
};

// This file contains the set of datalog rules executed on the constraint system.
// Facts/relations will be produced according to the rules from existing
// facts until a fixed point is reached.
// Facts marked by `@input` are provided as input to the rule engine,
// and cannot be derived/extended by the rules.
// Facts marked by `@output` are collected as output from the rules engine.
// The only output is a set of Action rules to be applied to the constraint system.
// Substitutions performed on constraints inside the rule system are not
// automatically reflected in the constraint system to be optimized.
//
// The conditions of the rules are looped over / checked in the order in which they are
// written. If all of them match, the "head" of the rule is executed and a new
// fact is inserted into the database.
// If non-trivial rust code is used as a condition, it is advisable to end the rule
// after that condition and create a new "intermediate" fact for performance reasons.
//
// Since all rules are executed as long as they match, it is not possible to restrict
// or somehow direct the fact derivation process. For example, if a variable replacement
// is derived, new algebraic constraints will be created, but this does not mean that
// the old constraints are removed. If we have a constraint that has many variables
// and all of them are determined to be constant by other constraints, then the
// derivation process will create all possible combinations of substitutions.
// The same is true for range constraints: If we have a rule that requires a
// range constraint for a variable, it will iterate over all range constraints
// that have been derived for that variable over the course of executing the rules,
// not just the most strict one.

crepe! {
    @input
    pub struct Env<'a, T: FieldElement>(pub &'a Environment<T>);

    @input
    pub struct InitialAlgebraicConstraint(pub Expr);

    @input
    pub struct InitialRangeConstraintOnExpression<T: FieldElement>(pub Expr, pub RangeConstraint<T>);

    @input
    pub struct RangeConstraintOnVar<T: FieldElement>(pub Var, pub RangeConstraint<T>);

    struct AlgebraicConstraint(Expr);
    AlgebraicConstraint(e) <- InitialAlgebraicConstraint(e);

    // This rule is important: Just because a rule "generates" an Expr it does not
    // mean that it automatically is an Expression. If we want to say something
    // about all Exprs, we have to make sure to "obtain" them from Expression.
    struct Expression(Expr);
    Expression(e) <- AlgebraicConstraint(e);
    Expression(e) <- InitialRangeConstraintOnExpression(e, _);

    // ReplaceAlgebraicConstraintBy(old_expr, new_expr) => old_expr can equivalently
    // be replaced by new_expr (and new_expression is in some way "simpler").
    struct ReplaceAlgebraicConstraintBy(Expr, Expr);

    // ReplaceAlgebraicConstraintsBy(e1, e2) =>
    // the system that does not have the constraints in `e1` but has
    // the new constraints in `e2` is equivalent.
    struct ReplaceAlgebraicConstraintsBy([Option<Expr>; 10], [Option<Expr>; 5]);

    //////////////////// BASIC SEMANTIC PROPERTIES OF EXRESSIONS //////////////////////


    // EqualZero(e) => e = 0 for all satisfying assignments.
    struct EqualZero(Expr);
    EqualZero(e) <- AlgebraicConstraint(e);

    //////////////////// STRUCTURAL PROPERTIES OF EXPRESSIONS //////////////////////

    // ContainsVariable(e, v) => v appears inside e.
    struct ContainsVariable(Expr, Var);
    ContainsVariable(e, v) <-
      Env(env),
      Expression(e),
      for v in env.on_expr(e, (), |e, _| e.referenced_unknown_variables().cloned().collect_vec());

    struct Product(Expr, Expr, Expr);
    Product(e, l, r) <-
      Expression(e),
      Env(env),
      let Some((l, r)) = env.try_as_single_product(e);
    Product(e, r, l) <- Product(e, l, r);
    Expression(e) <- Product(_, e, _);
    Expression(e) <- Product(_, _, e);

    // AffineExpression(e, coeff, var, offset) => e = coeff * var + offset
    struct AffineExpression<T: FieldElement>(Expr, T, Var, T);
    AffineExpression(e, coeff, var, offset) <-
      Expression(e),
      Env(env),
      let Some((coeff, var, offset)) = env.try_to_affine(e);

    struct LinearExpression<T: FieldElement>(Expr, Var, T);
    LinearExpression(e, var, coeff) <-
      AffineExpression(e, coeff, var, T::zero());

    struct Constant<T: FieldElement>(Expr, T);
    Constant(e, value) <-
      Expression(e),
      Env(env),
      let Some(value) = env.try_to_number(e);

    // Split the expression into head and tail
    // ExpressionSumHeadTail(e, h, t) => e = h + t
    struct ExpressionSumHeadTail(Expr, Expr, Expr);
    ExpressionSumHeadTail(e, head, tail) <-
      Env(env),
      Expression(e),
      let Some((head, tail)) = env.try_split_into_head_tail(e);
    Expression(head) <- ExpressionSumHeadTail(_, head, _);
    Expression(tail) <- ExpressionSumHeadTail(_, _, tail);

    // SimpleSum(e, f, c) => e is of the form f * v_1 + f * v_2 + ... + f * v_n + c,
    //                      n >= 1
    struct SimpleSum<T: FieldElement>(Expr, T, T);
    SimpleSum(e, f, c) <-
      ExpressionSumHeadTail(e, head, tail),
      SimpleSum(tail, f, c),
      LinearExpression(head, _, f);
    SimpleSum(e, f, c) <- AffineExpression(e, f, _, c);

    // IsAffine(e) => e is an affine expression, i.e. does not have super-linear parts.
    struct IsAffine(Expr);
    IsAffine(e) <-
      Constant(e, _);
    IsAffine(e) <-
      ExpressionSumHeadTail(e, head, tail),
      LinearExpression(head, _, _),
      IsAffine(tail);

    // HasSummand(e, summand) => summand is one of the summands of e.
    struct HasSummand(Expr, Expr);
    HasSummand(e, summand) <- ExpressionSumHeadTail(e, summand, _);
    HasSummand(e, summand) <-
      ExpressionSumHeadTail(e, _, tail),
      HasSummand(tail, summand);

    // DifferBySummand(e1, e2, s) => e1 = e2 + s and `s` is not a sum
    // and not a constant.
    // Note that `e1` and `e2` must "pre-exist" as expressions, i.e.
    // this rule cannot be used to split out a linear summand
    // from an expression but only to "compare" two expressions.
    struct DifferBySummand(Expr, Expr, Expr);
    DifferBySummand(e1, e2, s) <-
      ExpressionSumHeadTail(e1, s, e2);
    DifferBySummand(e1, e2, s) <-
      DifferBySummand(tail1, tail2, s),
      ExpressionSumHeadTail(e1, head, tail1),
      ExpressionSumHeadTail(e2, head, tail2);

    // AffinelyRelated(e1, f, e2, c) => e1 = f * e2 + c
    // Note this is currently only implemented for affine e1 and e2.
    // This only works if e1 and e2 have at least one variable
    // and both e1 and e2 have to "pre-exist" as expressions.
    // This means this rule cannot be used to subtract constants
    // or multiply/divide by constants alone.
    struct AffinelyRelated<T: FieldElement>(Expr, T, Expr, T);
    AffinelyRelated(e1, f, e2, o1 - o2 * f) <-
      AffineExpression(e1, f1, v, o1), // e1 = f1 * v + o1
      AffineExpression(e2, f2, v, o2),
      // Optimization: Compute f1 / f2 only once.
      let f = f1 / f2;
      // e2 = f2 * v + o2
      // e1 = f1 * (e2 - o2) / f2 + o1 = e2 * (f1 / f2) + (o1 - o2 * f1 / f2)

    AffinelyRelated(e1, f, e2, o) <-
      AffinelyRelated(tail1, f, tail2, o),
      // The swapped case and the equal will be computed by other rules.
      ExpressionSumHeadTail(e1, head1, tail1),
      LinearExpression(head1, v, f1),
      ExpressionSumHeadTail(e2, head2, tail2),
      LinearExpression(head2, v, f1 / f);

    // HasProductSummand(e, l, r) => e contains a summand of the form l * r
    struct HasProductSummand(Expr, Expr, Expr);
    HasProductSummand(e, l, r) <-
      HasSummand(e, summand),
      Product(summand, l, r);
    HasProductSummand(e, r, l) <- HasProductSummand(e, l, r);

    // ProductConstraint(e, l, r) => e is an algebraic constraint of the form l * r = 0
    struct ProductConstraint(Expr, Expr, Expr);
    ProductConstraint(e, l, r) <-
      AlgebraicConstraint(e),
      Product(e, l, r);

    // BooleanAndSubsetOfVars(e1, e2) => e1 and e2 are affine expressions only containing boolean variables and
    //   all variables in e1 also appear in e2
    struct BooleanAndSubsetOfVars(Expr, Expr);
    BooleanAndSubsetOfVars(e1, e2) <-
      AffineExpression(e1, _, v, _),
      ContainsVariable(e2, v),
      AffineAndAllVarsBoolean(e1),
      AffineAndAllVarsBoolean(e2);
    BooleanAndSubsetOfVars(e1, e2) <-
      BooleanAndSubsetOfVars(tail1, tail2),
      ExpressionSumHeadTail(e1, head1, tail1),
      LinearExpression(head1, v, _),
      BooleanVar(v),
      ExpressionSumHeadTail(e2, head2, tail2),
      LinearExpression(head2, v, _);
    BooleanAndSubsetOfVars(e1, e2) <-
      BooleanAndSubsetOfVars(e1, tail2),
      ExpressionSumHeadTail(e2, _, tail2),
      AffineAndAllVarsBoolean(e2);

    // AffineAndAllVarsBoolean(e) => e is an affine expression and all variables in e are boolean variables
    struct AffineAndAllVarsBoolean(Expr);
    AffineAndAllVarsBoolean(e) <-
      AffineExpression(e, _, v, _),
      BooleanVar(v);
    AffineAndAllVarsBoolean(e) <-
      ExpressionSumHeadTail(e, head, tail),
      AffineAndAllVarsBoolean(head),
      AffineAndAllVarsBoolean(tail);

    //////////////////////// RANGE CONSTRAINTS //////////////////////////

    // Range constraints are tricky because they can easily lead to exponential behaviour.
    // Because of that, we should never update a range constraint on a variable
    // and only compute range constraints on expressions from smaller expressions.

    struct RangeConstraintOnExpression<T: FieldElement>(Expr, RangeConstraint<T>);
    RangeConstraintOnExpression(e, rc) <-
      InitialRangeConstraintOnExpression(e, rc);
    RangeConstraintOnExpression(e, rc.square()) <-
      Product(e, l, r),
      (l == r),
      RangeConstraintOnExpression(l, rc);
    RangeConstraintOnExpression(e, l_rc.combine_product(&r_rc)) <-
      Product(e, l, r),
      (l < r),
      RangeConstraintOnExpression(l, l_rc),
      RangeConstraintOnExpression(r, r_rc);
    RangeConstraintOnExpression(e, v_rc.multiple(coeff)) <-
      LinearExpression(e, v, coeff),
      RangeConstraintOnVar(v, v_rc);
    RangeConstraintOnExpression(e, head_rc.combine_sum(&tail_rc)) <-
      ExpressionSumHeadTail(e, head, tail),
      RangeConstraintOnExpression(head, head_rc),
      RangeConstraintOnExpression(tail, tail_rc);
    RangeConstraintOnExpression(e, RangeConstraint::from_value(value)) <-
      Constant(e, value);

    // UpdateRangeConstraintOnVar(v, rc) => rc is a valid range constraint for variable v
    // This is an output predicate and might cause the rule system to re-run if
    // the range constraint is better than the currently best known.
    // Please avoid deriving new range constraints directly since this can easily
    // lead to exponential behaviour.
    struct UpdateRangeConstraintOnVar<T: FieldElement>(Var, RangeConstraint<T>);
    // RC(coeff * var + offset) = rc <=>
    // coeff * RC(var) + offset = rc <=>
    // RC(var) = (rc - offset) / coeff
    UpdateRangeConstraintOnVar(v, rc.combine_sum(&RangeConstraint::from_value(-offset)).multiple(T::one() / coeff)) <-
      RangeConstraintOnExpression(e, rc),
      AffineExpression(e, coeff, v, offset),
      (coeff != T::zero());

    // This derives boolean constraints on variables from `v * (v - 1) = 0`,
    // but also works with `v * (v - 8) = 0` or similar.
    UpdateRangeConstraintOnVar(v, RangeConstraint::from_value(c1).disjunction(&RangeConstraint::from_value(c2))) <-
      ProductConstraint(_, l, r),
      (l < r),
      Solvable(l, v, c1),
      Solvable(r, v, c2);

    // BooleanVar(v) => v is 0 or 1
    struct BooleanVar(Var);
    BooleanVar(v) <- RangeConstraintOnVar(v, RangeConstraint::from_mask(1));

    // BooleanExpressionConstraint(constr, e) => if constr is satisfied then e = 1 or e = 0
    struct BooleanExpressionConstraint(Expr, Expr);
    BooleanExpressionConstraint(constr, r) <-
      ProductConstraint(constr, l, r),
      // l = f * r + c, i.e. constr = (f * r + c) * r = 0
      // <=> (r + c / f) * r = 0
      // i.e. c / f = -1 <=> c = -f
      AffinelyRelated(l, f, r, c),
      (c == -f);

    //////////////////////// SINGLE-OCCURRENCE VARIABLES //////////////////////////

    // Combine multiple variables that only occur in the same algebraic constraint.
    //
    // The use-case here is for "diff_inv_marker_..." variables that each are the
    // inverse of certain variables only if those variables are non-zero
    // (and arbitrary otherwise).
    // If the "diff_inv_marker_..." variables only occur once, they are essentially
    // "free" variables and under some conditions, we can combine them into a single
    // free variable and thus reduce the number of variables.
    //
    // Assume we have an algebraic constraint of the form `X * V1 + Y * V2 = R`,
    // where `V1` and `V2` only occur in this constraint and only once.
    // The only combination of values for `X`, `Y` and `R` where this is _not_ satisfiable
    // is `X = 0`, `Y = 0`, `R != 0`. So the constraint is equivalent to the statement
    // `(X = 0 and Y = 0) -> R = 0`.
    //
    // Consider the simpler case where both `X` and `Y` are non-negative such that
    // `X + Y` does not wrap.
    // Then `X = 0 and Y = 0` is equivalent to `X + Y = 0`. So we can replace the constraint
    // by `(X + Y) * V3 = C`, where `V3` is a new variable that only occurs here.
    //
    // For the general case, where e.g. `X` can be negative, we replace it by `X * X`,
    // if that value is still small enough.
    struct SingleOccurrenceVariable(Var);
    SingleOccurrenceVariable(v) <-
      Env(env),
      for v in env.single_occurrence_variables().cloned();
    // SingleOccurrenceVariable(e, v) => v occurs only once in e and e is the
    // only constraint it appears in.
    struct SingleOccurrenceVariableInExpr(Expr, Var);
    SingleOccurrenceVariableInExpr(e, v) <-
      SingleOccurrenceVariable(v),
      ContainsVariable(e, v),
      AlgebraicConstraint(e);

    // LargestSingleOccurrenceVariablePairInExpr(e, v1, v2) =>
    // v1 and v2 are different variables that only occur in e and only once,
    // and are the two largest variables with that property in e.
    struct LargestSingleOccurrenceVariablePairInExpr(Expr, Var, Var);
    LargestSingleOccurrenceVariablePairInExpr(e, v1, v2) <-
      Env(env),
      SingleOccurrenceVariableInExpr(e, v1),
      SingleOccurrenceVariableInExpr(e, v2),
      (v1 < v2),
      (env
        .single_occurrence_variables()
        .filter(|v3| env.on_expr(e, (), |e, _| {
            e.referenced_unknown_variables().any(|v| v == *v3)
        }))
        .all(|&v3| v3 == v1 || v3 == v2 || v3 < v1));

    // FreeVariableCombinationCandidate(e, coeff1, v1, coeff2, v2, x1, x2)
    // => e is the expression of an algebraic constraint and
    // e = coeff1 * v1 * x1 + coeff2 * v2 * x2 + ...
    // where v1 and v2 are different variables that only occur here and only once.
    struct FreeVariableCombinationCandidate<T: FieldElement>(Expr, T, Var, Expr, T, Var, Expr);
    FreeVariableCombinationCandidate(e, coeff1, v1, x1, coeff2, v2, x2) <-
      // If we only consider the largest variable pair we could miss optimization opportunities,
      // but at least the replacement becomes deterministic.
      LargestSingleOccurrenceVariablePairInExpr(e, v1, v2),
      AlgebraicConstraint(e),
      HasProductSummand(e, x1, v1_e),
      LinearExpression(v1_e, v1, coeff1),
      HasProductSummand(e, x2, v2_e),
      (x2 != v1_e),
      (x1 != v2_e),
      LinearExpression(v2_e, v2, coeff2);

    ReplaceAlgebraicConstraintBy(e, replacement) <-
      Env(env),
      FreeVariableCombinationCandidate(e, coeff1, v1, x1, coeff2, v2, x2),
      // Here, we have e = coeff1 * v1 * x1 + coeff2 * v2 * x2 + ...
      RangeConstraintOnExpression(x1, rc1),
      RangeConstraintOnExpression(x2, rc2),
      let Some(replacement) = (|| {
        // If the expression is not known to be non-negative, we square it.
        let square_if_needed = |expr: Expr, rc: RangeConstraint<T>| {
            let expr = env.extract(expr);
            if rc.range().0 == T::zero() {
                (expr, rc)
            } else {
                (expr.clone() * expr, rc.square())
            }
        };
        let (x1, rc1) = square_if_needed(x1, rc1);
        let (x2, rc2) = square_if_needed(x2, rc2);
        if !rc1.range().0.is_zero() || !rc2.range().0.is_zero() {
            return None;
        }
        let sum_rc = rc1.multiple(coeff1).combine_sum(&rc2.multiple(coeff2));
        if !(sum_rc.range().0.is_zero() && sum_rc.range().1 < T::from(-1)) {
            return None;
        }
        // Remove the summands with v1 and v2 from the expression.
        let r = env.extract(e).into_summands().filter(|s|{
            if let GroupedExpressionComponent::Quadratic(l, r) = s {
                let mut vars = l.referenced_unknown_variables().chain(r.referenced_unknown_variables());
                if vars.any(|v| v == &v1 || v == &v2) {
                    return false;
                }
            };
            true
        }).map(GroupedExpression::from).sum::<GroupedExpression<T, Var>>();
        let factor = x1.clone() * coeff1 + x2.clone() * coeff2;
        let combined_var = env.new_var("free_var", ComputationMethod::QuotientOrZero(-r.clone(), factor.clone()));
        let replacement = r + GroupedExpression::from_unknown_variable(combined_var) * factor;
        Some(env.insert_owned(replacement))
      })();

    //////////////////// EQUAL ZERO TEST ////////////////////////

    // PlusMinusResult(e, e1, v2) =>
    //   e = e1 * (2 * v2 - 1)
    struct PlusMinusResult(Expr, Expr, Var);
    PlusMinusResult(e, e1, v2) <-
      Product(e, e1, r),
      AffineExpression(r, coeff, v2, offset),
        (coeff == T::from(2)),
        (offset == T::from(-1));

    // DiffMarkerConstraint(e, diff_marker, e2, cmp_result, diff_val) =>
    //   e = diff_marker * (e2 * (2 * cmp_result - 1) + diff_val)
    // (up to a factor)
    struct DiffMarkerConstraint(Expr, Var, Expr, Var, Var);
    DiffMarkerConstraint(e, diff_marker, e2, cmp_result, diff_val) <-
      ProductConstraint(e, l, r),
      LinearExpression(l, diff_marker, _),
      // Note: the quadratic part has to be the head
      ExpressionSumHeadTail(r, r1, r2),
        PlusMinusResult(r1, e2, cmp_result),
        LinearExpression(r2, diff_val, _);

    // NegatedDiffMarkerConstraint(e, diff_marker, diff_expr, v, result, n) =>
    //   e is the constraint diff_marker_expr * (v * (2 * result - 1)) = 0
    //   and diff_marker_expr is of the form `1 - diff_marker1 - diff_marker2 - ...`
    //   such that we have n variables and there is another
    //   NegatedDiffMarkerConstraint with n-1 variables used to derive this one.
    struct NegatedDiffMarkerConstraint(Expr, Var, Expr, Var, Var, u32);
    NegatedDiffMarkerConstraint(e, diff_marker, l, v, result, 0) <-
      ProductConstraint(e, l, r),
      AffineExpression(l, T::from(-1), diff_marker, T::from(1)),
      PlusMinusResult(r, r2, result),
      LinearExpression(r2, v, T::from(-1));
    NegatedDiffMarkerConstraint(e, diff_marker, l, v, result, n + 1) <-
      ProductConstraint(e, l, r),
        NegatedDiffMarkerConstraint(_, _, diff_marker_expr2, _, result, n),
        DifferBySummand(l, diff_marker_expr2, diff_marker_e),
          LinearExpression(diff_marker_e, diff_marker, T::from(-1)),
      PlusMinusResult(r, r2, result),
      LinearExpression(r2, v, T::from(-1));

    // NegatedDiffMarkerConstraintFinal(e, diff_marker, l, result, n) =>
    //   e is the constraint diff_marker_expr * (result) = 0
    //   and diff_marker_expr is of the form `1 - diff_marker1 - diff_marker2 - ...`
    //   such that we have n variables and there is another
    //   NegatedDiffMarkerConstraint with n-1 variables used to derive this one.
    struct NegatedDiffMarkerConstraintFinal(Expr, Var, Expr, Var, u32);
    NegatedDiffMarkerConstraintFinal(e, diff_marker, l, result, n + 1) <-
      ProductConstraint(e, l, r),
        NegatedDiffMarkerConstraint(_, _, diff_marker_expr2, _, result, n),
        DifferBySummand(l, diff_marker_expr2, diff_marker_e),
          LinearExpression(diff_marker_e, diff_marker, T::from(-1)),
      LinearExpression(r, result, T::from(1));

    struct NegatedDiffMarkerConstraintFinalNegated(Expr, Var, Var, Var, u32);
    NegatedDiffMarkerConstraintFinalNegated(e, diff_marker, v, result, n + 1) <-
      ProductConstraint(e, l, r),
        NegatedDiffMarkerConstraint(_, _, diff_marker_expr2, _, result, n),
        DifferBySummand(l, diff_marker_expr2, diff_marker_e),
          LinearExpression(diff_marker_e, diff_marker, T::from(-1)),
      PlusMinusResult(r, r2, result),
      AffineExpression(r2, T::from(-1), v, T::from(1));

    // EqualZeroCheck(constrs, result, vars) =>
    //   constrsexprs can be equivalently replaced by a constraint that models
    //   result = 1 if all vars are zero, and result = 0 otherwise.
    struct EqualZeroCheck([Expr; 10], Var, [Var; 4]);
    EqualZeroCheck(constrs, result, vars) <-
      // (1 - diff_marker__3_0) * (a__3_0 * (2 * cmp_result_0 - 1)) = 0
      NegatedDiffMarkerConstraint(constr_0, diff_marker_3, _, a_3, result, 0),
      // (1 - (diff_marker__2_0 + diff_marker__3_0)) * (a__2_0 * (2 * cmp_result_0 - 1)) = 0
      NegatedDiffMarkerConstraint(constr_1, diff_marker_2, _, a_2, result, 1),
      // (1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (a__1_0 * (2 * cmp_result_0 - 1)) = 0
      NegatedDiffMarkerConstraint(constr_2, diff_marker_1, _, a_1, result, 2),
      // (1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
      NegatedDiffMarkerConstraintFinal(constr_3, diff_marker_0, one_minus_diff_marker_sum, result, 3),
      // (1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((1 - a__0_0) * (2 * cmp_result_0 - 1)) = 0
      NegatedDiffMarkerConstraintFinalNegated(constr_4, diff_marker_0, a_0, result, 3),
      // diff_marker__0_0 * ((a__0_0 - 1) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
      DiffMarkerConstraint(constr_5, diff_marker_0, a_0_e, result, diff_val),
        AffineExpression(a_0_e, a_0_e_coeff, a_0, a_0_e_offset), (a_0_e_coeff == T::from(1)), (a_0_e_offset == T::from(-1)),
      // diff_marker__1_0 * (a__1_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
      DiffMarkerConstraint(constr_6, diff_marker_1, a_1_e, result, diff_val),
        LinearExpression(a_1_e, a_1, T::from(1)),
      // diff_marker__2_0 * (a__2_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
      DiffMarkerConstraint(constr_7, diff_marker_2, a_2_e, result, diff_val),
        LinearExpression(a_2_e, a_2, T::from(1)),
      // diff_marker__3_0 * (a__3_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
      DiffMarkerConstraint(constr_8, diff_marker_3, a_3_e, result, diff_val),
        LinearExpression(a_3_e, a_3, T::from(1)),
      BooleanVar(result),
      BooleanVar(diff_marker_0),
      BooleanVar(diff_marker_1),
      BooleanVar(diff_marker_2),
      BooleanVar(diff_marker_3),
      RangeConstraintOnVar(a_0, rc_a0),
      RangeConstraintOnVar(a_1, rc_a1),
      RangeConstraintOnVar(a_2, rc_a2),
      RangeConstraintOnVar(a_3, rc_a3),
      // The next is needed so that the constraint `result + sum_inv_var * sum_of_vars - 1 = 0`
      // works. If there is a way to get the sum to be zero but not all variables are zero,
      // then this constraint cannot be satisfied.
      ( rc_a0.range().0 == T::zero() && rc_a1.range().0 == T::zero()
        && rc_a2.range().0 == T::zero() && rc_a3.range().0 == T::zero()
        && rc_a0.combine_sum(&rc_a1).combine_sum(&rc_a2).combine_sum(&rc_a3).range().1 < T::from(-1)),
      // (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
      BooleanExpressionConstraint(constr_9, diff_marker_sum),
      AffinelyRelated(diff_marker_sum, T::from(-1), one_minus_diff_marker_sum, T::from(1)),
      let constrs = [constr_0, constr_1, constr_2, constr_3, constr_4, constr_5, constr_6, constr_7, constr_8, constr_9],
      let vars = [a_0, a_1, a_2, a_3];

    ReplaceAlgebraicConstraintsBy(extend_by_none(constrs), extend_by_none(replacement)) <-
      Env(env),
      EqualZeroCheck(constrs, result, vars),
      let replacement = {
        let result = GroupedExpression::from_unknown_variable(result);
        assert!(vars.len() == 4);
        let vars = vars.into_iter().map(|v| GroupedExpression::from_unknown_variable(v)).collect_vec();
        let sum_of_vars = vars.iter().cloned().sum::<GroupedExpression<_, _>>();
        let sum_inv_var = GroupedExpression::from_unknown_variable(
          env.new_var("inv_of_sum", ComputationMethod::QuotientOrZero(One::one(), sum_of_vars.clone()))
        );
        [
          env.insert_owned(result.clone() * sum_of_vars.clone()),
          env.insert_owned(result + sum_inv_var * sum_of_vars - One::one()),
        ]
      };

    //////////////// COMBINE CONSTRAINTS WITH NON-NEGATIVE FACTORS /////////////////////

    // If we have `x * a = 0` and `x * b = 0` and `a` and `b` are
    // both non-negative and their sum is constrained, then we can replace
    // both constraints by `x * (a + b) = 0`.
    ReplaceAlgebraicConstraintsBy(extend_by_none([e1, e2]), replacement) <-
      Env(env),
      ProductConstraint(e1, x, a),
      ProductConstraint(e2, x, b),
      (e1 < e2),
      RangeConstraintOnExpression(a, rc_a),
      RangeConstraintOnExpression(b, rc_b),
      (rc_a.range().0 == T::zero()
        && rc_b.range().0 == T::zero() && !rc_a.combine_sum(&rc_b).is_unconstrained()),
      let replacement = extend_by_none([env.insert_owned(env.extract(x) * (env.extract(a) + env.extract(b)))]);

    //////////////////////// AFFINE SOLVING //////////////////////////

    // Solvable(e, var, value) => (e = 0 => var = value)
    // Note that e is not required to be a constraint here.
    struct Solvable<T: FieldElement>(Expr, Var, T);
    Solvable(e, var, -offset / coeff) <-
      AffineExpression(e, coeff, var, offset);

    // Assignment(var, v) => any satisfying assignment has var = v.
    struct Assignment<T: FieldElement>(Var, T);
    Assignment(var, v) <-
      EqualZero(e),
      Solvable(e, var, v);

    ///////////////////////////////// NO-WRAP ZERO SUM //////////////////////////

    // If an algebraic constraint head + tail = 0 has the following properties:
    // 1. the range constraint of head is [0, a] with a < P - 1,
    // 2. the range constraint of tail is [0, b] with b < P - 1,
    // 3. a + b (as integers) < P - 1,
    // then both head and tail must be zero.

    // EntailsZeroHeadAndTail(e1, e2) => e1 = 0 and e2 = 0
    struct EntailsZeroHeadAndTail(Expr, Expr);
    EntailsZeroHeadAndTail(head, tail) <-
      EqualZero(e),
      ExpressionSumHeadTail(e, head, tail),
      RangeConstraintOnExpression(head, rc_head),
      RangeConstraintOnExpression(tail, rc_tail),
      (rc_head.range().0 == T::from(0)),
      (rc_tail.range().0 == T::from(0)),
      (rc_head.range().1.to_integer() + rc_tail.range().1.to_integer() < T::from(-1).to_integer());

    EqualZero(head) <- EntailsZeroHeadAndTail(head,_);
    EqualZero(tail) <- EntailsZeroHeadAndTail(_, tail);


    ///////////////////////////////// ONE-HOT FLAG ///////////////////////////

    // ExactlyOneSet(e) => exactly one variable in e is one, all others are zero.
    struct ExactlyOneSet(Expr);
    ExactlyOneSet(e) <-
      AlgebraicConstraint(e),
      SimpleSum(e, f, c),
      AffineAndAllVarsBoolean(e),
      ((f + c).is_zero());

    // We want to match expressions of the form f_1 * v_1 + f_2 * v_2 + ... + f_n * v_n + c = 0
    // where all v_i are boolean and exactly one of the f_i equals -c.

    // AffineSumCountCoeffs(e, None, f) => e is an affine expression where
    //   the constant term is -f no variable has the coefficient f.
    // AffineSumCountCoeffs(e, Some(v), f) => e is an affine expression where
    //   the constant term is -f and exactly one variable has the coefficient f and
    //   that variable is v.
    struct AffineSumCountCoeffs<T: FieldElement>(Expr, Option<Var>, T);
    AffineSumCountCoeffs(e, None, -c) <- Constant(e, c);
    AffineSumCountCoeffs(e, Some(v), f) <-
      AffineSumCountCoeffs(tail, None, f),
      ExpressionSumHeadTail(e, head, tail),
      LinearExpression(head, v, f);
    AffineSumCountCoeffs(e, v1, f) <-
      AffineSumCountCoeffs(tail, v1, f),
      ExpressionSumHeadTail(e, head, tail),
      LinearExpression(head, _, coeff),
      (coeff != f);

    Assignment(v, T::from((Some(v) == v2) as u32)) <-
      ExactlyOneSet(e1),
      AlgebraicConstraint(e1),
      BooleanAndSubsetOfVars(e2, e1),
      // At this point, we know that at most one of the variables in e2 is one,
      // the rest is zero.
      AlgebraicConstraint(e2),
      AffineSumCountCoeffs(e2, v2, _),
      // At this point, either no variable in e2 has coefficient -c (v2 == None)
      // or exactly one variable (v2.unwrap()) has coefficient -c.
      // In any case, the variable equal to v2.unwrap() is one, the rest zero.
      HasSummand(e2, summand),
      LinearExpression(summand, v, _);


    ///////////////////////////////// OUTPUT ACTIONS //////////////////////////

    struct Equivalence(Var, Var);

    //------- quadratic equivalence -----

    // QuadraticEquivalenceCandidate(E, expr, offset) =>
    //   E = (expr * (expr + offset) = 0) is a constraint and
    //   expr is affine with at least 2 variables.
    struct QuadraticEquivalenceCandidate<T: FieldElement>(Expr, Expr, T);
    QuadraticEquivalenceCandidate(e, r, o / f) <-
       Env(env),
       ProductConstraint(e, l, r),
       AffinelyRelated(l, f, r, o), // l = f * r + o
       IsAffine(l),
       ({env.affine_var_count(l).unwrap_or(0) > 1});

    // QuadraticEquivalenceCandidatePair(expr1, expr2, offset1 / coeff, v1, v2) =>
    //  (expr1) * (expr1 + offset1) = 0 and (expr2) * (expr2 + offset2) = 0 are constraints,
    //  expr1 is affine with at least 2 variables and is obtained from
    //  expr2 * factor by substituting v2 by v1 (factor != 0),
    //  offset1 == offset2 * factor and coeff is the coefficient of v1 in expr1.
    //
    //  This means that v1 is always equal to (-expr1 / coeff) or equal to
    //  (-(expr1 + offset1) / coeff) = (-expr1 / coeff - offset1 / coeff).
    //  Because of the above, also v2 is equal to
    //  (-expr1 / coeff) or equal to (-(expr1 + offset1) / coeff) [Yes, expr1!].
    struct QuadraticEquivalenceCandidatePair<T: FieldElement>(Expr, Expr, T, Var, Var);
    QuadraticEquivalenceCandidatePair(expr1, expr2, offset1 / coeff, v1, v2) <-
      Env(env),
      QuadraticEquivalenceCandidate(_, expr1, offset1),
      QuadraticEquivalenceCandidate(_, expr2, offset2),
      (expr1 < expr2),
      let Some((v1, v2, factor)) = env.differ_in_exactly_one_variable(expr1, expr2),
      (offset1 == offset2 * factor),
      let coeff = env.on_expr(expr1, (), |e, _| *e.coefficient_of_variable_in_affine_part(&v1).unwrap());

    // QuadraticEquivalence(v1, v2) => v1 and v2 are equal in all satisfying assignments.
    // Because of QuadraticEquivalenceCandidatePair, v1 is equal to X or X + offset,
    // where X is some value that depends on other variables. Similarly, v2 is equal to X or X + offset.
    // Because of the range constraints of v1 and v2, these two "or"s are exclusive ors.
    // This means depending on the value of X, it is either X or X + offset.
    // Since this "decision" only depens on X, both v1 and v2 are either X or X + offset at the same time
    // and thus equal.
    struct QuadraticEquivalence(Var, Var);
    QuadraticEquivalence(v1, v2) <-
      QuadraticEquivalenceCandidatePair(_, _, offset, v1, v2),
      RangeConstraintOnVar(v1, rc),
      RangeConstraintOnVar(v2, rc),
      (rc.is_disjoint(&rc.combine_sum(&RangeConstraint::from_value(offset))));

    Equivalence(v1, v2) <- QuadraticEquivalence(v1, v2);

    @output
    pub struct ActionRule<T: FieldElement>(pub Action<T>);
    ActionRule(Action::UpdateRangeConstraintOnVar(v, rc)) <-
      UpdateRangeConstraintOnVar(v, rc);
    ActionRule(Action::SubstituteVariableByConstant(v, val)) <-
      Assignment(v, val);
    // Substitute the larger variable by the smaller.
    ActionRule(Action::SubstituteVariableByVariable(v1, v2)) <-
      Equivalence(v1, v2), (v1 > v2);
    ActionRule(Action::SubstituteVariableByVariable(v2, v1)) <-
      Equivalence(v1, v2), (v2 > v1);
    ActionRule(Action::ReplaceAlgebraicConstraintBy(e1, e2)) <-
      ReplaceAlgebraicConstraintBy(e1, e2);

    @output
    pub struct ReplaceConstraintsActionRule(pub ReplaceConstraintsAction);
    ReplaceConstraintsActionRule(ReplaceConstraintsAction{ to_replace, replace_by }) <-
      ReplaceAlgebraicConstraintsBy(to_replace, replace_by);
}

fn extend_by_none<const N1: usize, const N2: usize>(items: [Expr; N1]) -> [Option<Expr>; N2] {
    let mut output = [None; N2];
    for (i, item) in items.iter().enumerate() {
        output[i] = Some(*item);
    }
    output
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/tests.rs
================================================
use std::fmt::Display;
use std::hash::Hash;

use crate::bus_interaction_handler::DefaultBusInteractionHandler;
use crate::rule_based_optimizer::driver::{batch_replace_algebraic_constraints, ReplacementAction};
use crate::{
    algebraic_constraint,
    constraint_system::{BusInteraction, BusInteractionHandler},
    grouped_expression::{GroupedExpression, NoRangeConstraints},
    indexed_constraint_system::IndexedConstraintSystem,
    range_constraint::RangeConstraint,
    rule_based_optimizer::driver::rule_based_optimization,
    solver::Solver,
};

use expect_test::expect;
use itertools::Itertools;
use num_traits::Zero;
use powdr_number::{BabyBearField, FieldElement, LargeInt};

fn assert_zero<T: FieldElement, V: Hash + Eq + Ord + Clone + Display>(
    expr: GroupedExpression<T, V>,
) -> algebraic_constraint::AlgebraicConstraint<GroupedExpression<T, V>> {
    algebraic_constraint::AlgebraicConstraint::assert_zero(expr)
}

fn v(name: &str) -> GroupedExpression<BabyBearField, String> {
    GroupedExpression::from_unknown_variable(name.to_string())
}

fn c(value: i64) -> GroupedExpression<BabyBearField, String> {
    GroupedExpression::from_number(BabyBearField::from(value))
}

fn new_var() -> impl FnMut(&str) -> String {
    let mut counter = 0;
    move |prefix: &str| {
        let name = format!("{prefix}_{counter}");
        counter += 1;
        name
    }
}

fn handle_variable_range_checker<T: FieldElement>(
    payload: &[RangeConstraint<T>],
) -> Vec<RangeConstraint<T>> {
    const MAX_BITS: u64 = 25;
    // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/var_range/bus.rs
    // Expects (x, bits), where `x` is in the range [0, 2^bits - 1]
    let [_x, bits] = payload else {
        panic!("Expected arguments (x, bits)");
    };
    match bits.try_to_single_value() {
        Some(bits_value) if bits_value.to_degree() <= MAX_BITS => {
            let bits_value = bits_value.to_integer().try_into_u64().unwrap();
            let mask = (1u64 << bits_value) - 1;
            vec![RangeConstraint::from_mask(mask), *bits]
        }
        _ => {
            vec![
                RangeConstraint::from_mask((1u64 << MAX_BITS) - 1),
                RangeConstraint::from_range(T::from(0), T::from(MAX_BITS)),
            ]
        }
    }
}

fn try_handle_bus_interaction<T: FieldElement>(
    bus_interaction: &BusInteraction<RangeConstraint<T>>,
) -> Option<BusInteraction<RangeConstraint<T>>> {
    let mult = bus_interaction.multiplicity.try_to_single_value()?;
    if mult == Zero::zero() {
        return None;
    }
    let bus_id = bus_interaction
        .bus_id
        .try_to_single_value()?
        .to_integer()
        .try_into_u64()?;
    let payload_constraints = match bus_id {
        3 => handle_variable_range_checker(&bus_interaction.payload),
        _ => return None,
    };
    Some(BusInteraction {
        payload: payload_constraints,
        ..bus_interaction.clone()
    })
}

#[derive(Clone)]
#[allow(dead_code)]
struct TestBusInteractionHandler;

impl<T: FieldElement> BusInteractionHandler<T> for TestBusInteractionHandler {
    fn handle_bus_interaction(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<T>>,
    ) -> BusInteraction<RangeConstraint<T>> {
        try_handle_bus_interaction(&bus_interaction).unwrap_or(bus_interaction)
    }
}

#[allow(dead_code)]
fn bit_constraint(
    variable: &str,
    bits: u32,
) -> BusInteraction<GroupedExpression<BabyBearField, String>> {
    BusInteraction {
        bus_id: c(3),
        payload: vec![v(variable), c(bits as i64)],
        multiplicity: c(1),
    }
}

#[test]
fn test_rule_based_optimization_empty() {
    let system: IndexedConstraintSystem<BabyBearField, String> = IndexedConstraintSystem::default();
    let optimized_system = rule_based_optimization(
        system,
        NoRangeConstraints,
        DefaultBusInteractionHandler::default(),
        &mut new_var(),
        None,
    );
    assert_eq!(optimized_system.0.system().algebraic_constraints.len(), 0);
}

#[test]
fn test_rule_based_optimization_simple_assignment() {
    let mut system = IndexedConstraintSystem::default();
    let x = v("x");
    system.add_algebraic_constraints([
        assert_zero(x * BabyBearField::from(7) - c(21)),
        assert_zero(v("y") * (v("y") - c(1)) - v("x")),
    ]);
    let optimized_system = rule_based_optimization(
        system,
        NoRangeConstraints,
        DefaultBusInteractionHandler::default(),
        &mut new_var(),
        None,
    );
    expect!["(y) * (y - 1) - 3 = 0"].assert_eq(&optimized_system.0.to_string());
}

#[test]
fn add_with_carry() {
    // This tests a case of equivalent constraints that appear in the
    // way "add with carry" is performed in openvm.
    // X and Y end up being equivalent because they are both either
    // A or A - 256, depending on whether the value of A is between
    // 0 and 255 or not.
    // A is the result of an addition with carry.
    let mut system = IndexedConstraintSystem::default();
    system.add_algebraic_constraints([
        assert_zero(
            (v("X") * c(7) - v("A") * c(7) + c(256) * c(7)) * (v("X") * c(7) - v("A") * c(7)),
        ),
        assert_zero((v("Y") - v("A") + c(256)) * (v("Y") - v("A"))),
    ]);
    system.add_bus_interactions([bit_constraint("X", 8), bit_constraint("Y", 8)]);
    let optimized_system = rule_based_optimization(
        system,
        NoRangeConstraints,
        TestBusInteractionHandler,
        &mut new_var(),
        None,
    );
    // Y has been replaced by X
    expect![[r#"
        (7 * A - 7 * X - 1792) * (7 * A - 7 * X) = 0
        (A - X - 256) * (A - X) = 0
        BusInteraction { bus_id: 3, multiplicity: 1, payload: X, 8 }
        BusInteraction { bus_id: 3, multiplicity: 1, payload: X, 8 }"#]]
    .assert_eq(&optimized_system.0.to_string());
}

#[test]
fn test_rule_based_optimization_quadratic_equality() {
    let mut system = IndexedConstraintSystem::default();
    system.add_algebraic_constraints([
        assert_zero(
            (c(30720) * v("rs1_data__0_1") + c(7864320) * v("rs1_data__1_1")
                - c(30720) * v("mem_ptr_limbs__0_1")
                + c(737280))
                * (c(30720) * v("rs1_data__0_1") + c(7864320) * v("rs1_data__1_1")
                    - c(30720) * v("mem_ptr_limbs__0_1")
                    + c(737281)),
        ),
        assert_zero(
            (c(30720) * v("rs1_data__0_1") + c(7864320) * v("rs1_data__1_1")
                - c(30720) * v("mem_ptr_limbs__0_2")
                + c(737280))
                * (c(30720) * v("rs1_data__0_1") + c(7864320) * v("rs1_data__1_1")
                    - c(30720) * v("mem_ptr_limbs__0_2")
                    + c(737281)),
        ),
    ]);
    system.add_bus_interactions([
        bit_constraint("rs1_data__0_1", 8),
        bit_constraint("rs1_data__1_1", 8),
        BusInteraction {
            bus_id: c(3),
            multiplicity: c(1),
            payload: vec![c(-503316480) * v("mem_ptr_limbs__0_1"), c(14)],
        },
        BusInteraction {
            bus_id: c(3),
            multiplicity: c(1),
            payload: vec![c(-503316480) * v("mem_ptr_limbs__0_2"), c(14)],
        },
    ]);
    let optimized_system = rule_based_optimization(
        system,
        NoRangeConstraints,
        TestBusInteractionHandler,
        &mut new_var(),
        None,
    );
    // Note that in the system below, mem_ptr_limbs__0_2 has been eliminated
    expect![[r#"
        (30720 * mem_ptr_limbs__0_1 - 30720 * rs1_data__0_1 - 7864320 * rs1_data__1_1 - 737280) * (30720 * mem_ptr_limbs__0_1 - 30720 * rs1_data__0_1 - 7864320 * rs1_data__1_1 - 737281) = 0
        (30720 * mem_ptr_limbs__0_1 - 30720 * rs1_data__0_1 - 7864320 * rs1_data__1_1 - 737280) * (30720 * mem_ptr_limbs__0_1 - 30720 * rs1_data__0_1 - 7864320 * rs1_data__1_1 - 737281) = 0
        BusInteraction { bus_id: 3, multiplicity: 1, payload: rs1_data__0_1, 8 }
        BusInteraction { bus_id: 3, multiplicity: 1, payload: rs1_data__1_1, 8 }
        BusInteraction { bus_id: 3, multiplicity: 1, payload: -(503316480 * mem_ptr_limbs__0_1), 14 }
        BusInteraction { bus_id: 3, multiplicity: 1, payload: -(503316480 * mem_ptr_limbs__0_1), 14 }"#]].assert_eq(&optimized_system.0.to_string());
}

#[test]
fn test_batch_replace_with_duplicate_constraints() {
    // Direct test of batch_replace_algebraic_constraints with duplicate constraints
    // This verifies that the HashSet-based tracking correctly handles duplicates
    let mut system: IndexedConstraintSystem<BabyBearField, String> =
        IndexedConstraintSystem::default();

    // Create a system with duplicate constraints
    system.add_algebraic_constraints([
        assert_zero(v("x") + v("y")),
        assert_zero(v("x") + v("y")),
        assert_zero(v("z") - c(5)),
    ]);

    assert_eq!(system.system().algebraic_constraints.len(), 3);

    // Replace "x + y = 0" and "z - 5 = 0" by "a = 0"
    let replacements = vec![ReplacementAction {
        replace: vec![v("x") + v("y"), v("z") - c(5)],
        replace_by: vec![v("a")],
    }];

    // Try to apply the replacement
    let result = batch_replace_algebraic_constraints(&mut system, replacements, None);

    // The replacement should succeed because we found the constraint to replace (even though it appears twice)
    assert!(result, "Replacement should succeed");

    expect!["a = 0"].assert_eq(
        &system
            .system()
            .algebraic_constraints
            .iter()
            .format("\n")
            .to_string(),
    );
}

#[test]
fn test_batch_replace_with_duplicate_constraints2() {
    let mut system: IndexedConstraintSystem<BabyBearField, String> =
        IndexedConstraintSystem::default();

    system.add_algebraic_constraints([assert_zero(v("x") + v("y")), assert_zero(v("z") - c(5))]);

    // Replacement has "x + y" twice, should get reduced to just a single one.
    let replacements = vec![ReplacementAction {
        replace: vec![v("x") + v("y"), v("x") + v("y")],
        replace_by: vec![v("a")],
    }];

    let result = batch_replace_algebraic_constraints(&mut system, replacements, None);

    assert!(result, "Replacement should succeed");
    expect![[r#"
        z - 5 = 0
        a = 0"#]]
    .assert_eq(
        &system
            .system()
            .algebraic_constraints
            .iter()
            .format("\n")
            .to_string(),
    );
}

#[test]
fn test_batch_replace_with_duplicate_constraints3() {
    let mut system: IndexedConstraintSystem<BabyBearField, String> =
        IndexedConstraintSystem::default();

    system.add_algebraic_constraints([
        // x + y is contained twice, both should be replaced.
        assert_zero(v("x") + v("y")),
        assert_zero(v("x") + v("y")),
        assert_zero(v("z") - c(5)),
    ]);

    let replacements = vec![ReplacementAction {
        replace: vec![v("x") + v("y")],
        replace_by: vec![v("a")],
    }];

    let result = batch_replace_algebraic_constraints(&mut system, replacements, None);

    assert!(result, "Replacement should succeed");
    expect![[r#"
        z - 5 = 0
        a = 0"#]]
    .assert_eq(
        &system
            .system()
            .algebraic_constraints
            .iter()
            .format("\n")
            .to_string(),
    );
}

#[test]
fn test_batch_replace_with_conflict() {
    let mut system: IndexedConstraintSystem<BabyBearField, String> =
        IndexedConstraintSystem::default();

    system.add_algebraic_constraints([assert_zero(v("x") + v("y")), assert_zero(v("z") - c(5))]);

    // both actions need "x + y", only the first can proceed
    let replacements = vec![
        ReplacementAction {
            replace: vec![v("x") + v("y")],
            replace_by: vec![v("a")],
        },
        ReplacementAction {
            replace: vec![v("x") + v("y"), v("z") - c(5)],
            replace_by: vec![v("b")],
        },
    ];

    let result = batch_replace_algebraic_constraints(&mut system, replacements, None);

    assert!(result, "Replacement should succeed");
    expect![[r#"
        z - 5 = 0
        a = 0"#]]
    .assert_eq(
        &system
            .system()
            .algebraic_constraints
            .iter()
            .format("\n")
            .to_string(),
    );

    let mut system: IndexedConstraintSystem<BabyBearField, String> =
        IndexedConstraintSystem::default();

    system.add_algebraic_constraints([assert_zero(v("x") + v("y")), assert_zero(v("z") - c(5))]);
    // both actions need "x + y", only the first can proceed, now reverse order.
    let replacements = vec![
        ReplacementAction {
            replace: vec![v("x") + v("y"), v("z") - c(5)],
            replace_by: vec![v("b")],
        },
        ReplacementAction {
            replace: vec![v("x") + v("y")],
            replace_by: vec![v("a")],
        },
    ];

    let result = batch_replace_algebraic_constraints(&mut system, replacements, None);

    assert!(result, "Replacement should succeed");
    expect!["b = 0"].assert_eq(
        &system
            .system()
            .algebraic_constraints
            .iter()
            .format("\n")
            .to_string(),
    );
}

#[test]
fn test_rule_split_constraints_based_on_minimal_range() {
    let mut system = IndexedConstraintSystem::default();
    //opcode_sub_flag_21 + 2 * opcode_xor_flag_21 + 3 * opcode_or_flag_21 + 4 * opcode_and_flag_21 = 0
    system.add_algebraic_constraints([assert_zero(
        v("opcode_sub_flag_21")
            + c(2) * v("opcode_xor_flag_21")
            + c(3) * v("opcode_or_flag_21")
            + c(4) * v("opcode_and_flag_21"),
    )]);

    let range_constraints = std::collections::HashMap::from([
        ("opcode_sub_flag_21", RangeConstraint::from_mask(0x1u32)),
        ("opcode_xor_flag_21", RangeConstraint::from_mask(0x1u32)),
        ("opcode_or_flag_21", RangeConstraint::from_mask(0x1u32)),
        ("opcode_and_flag_21", RangeConstraint::from_mask(0x1u32)),
    ]);

    let mut solver = crate::solver::new_solver(
        system.clone().into(),
        DefaultBusInteractionHandler::default(),
    );
    #[allow(clippy::iter_over_hash_type)]
    for (var, constraint) in range_constraints {
        solver.add_range_constraint(&var.to_string(), constraint);
    }

    let optimized_system = rule_based_optimization(
        system,
        solver,
        DefaultBusInteractionHandler::default(),
        &mut new_var(),
        None,
    );
    assert_eq!(optimized_system.0.system().algebraic_constraints.len(), 0);
}

#[test]
fn one_hot_flags() {
    let mut system = IndexedConstraintSystem::default();
    //opcode_sub_flag_21 + 2 * opcode_xor_flag_21 + 3 * opcode_or_flag_21 + 4 * opcode_and_flag_21 = 0
    system.add_algebraic_constraints([
        // Boolean flags
        assert_zero(v("flag0") * (v("flag0") - c(1))),
        assert_zero(v("flag1") * (v("flag1") - c(1))),
        assert_zero(v("flag2") * (v("flag2") - c(1))),
        assert_zero(v("flag3") * (v("flag3") - c(1))),
        // Exactly one flag is active
        assert_zero(v("flag0") + v("flag1") + v("flag2") + v("flag3") - c(1)),
        // Flag 2 is active
        assert_zero(
            v("flag0") * c(0) + v("flag1") * c(1) + v("flag2") * c(2) + v("flag3") * c(3) - c(2),
        ),
        assert_zero(v("flag0") * (v("x") - v("y"))),
        assert_zero(v("flag2") * (v("r") - v("t"))),
    ]);

    let optimized_system = rule_based_optimization(
        system,
        NoRangeConstraints,
        DefaultBusInteractionHandler::default(),
        &mut new_var(),
        None,
    );

    expect!["r - t = 0"].assert_eq(&optimized_system.0.to_string());
}


================================================
FILE: constraint-solver/src/rule_based_optimizer/types.rs
================================================
use std::fmt::Display;

use derive_more::{From, Into};
use powdr_number::FieldElement;

use crate::range_constraint::RangeConstraint;

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, From, Into)]
pub struct Var(usize);

impl Display for Var {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "v_{}", self.0)
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, From, Into)]
pub struct Expr(usize);

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Action<T: FieldElement> {
    UpdateRangeConstraintOnVar(Var, RangeConstraint<T>),
    SubstituteVariableByConstant(Var, T),
    /// Substitute the first variable by the second.
    SubstituteVariableByVariable(Var, Var),
    /// Replace one algebraic constraint by another.
    ReplaceAlgebraicConstraintBy(Expr, Expr),
}

/// Replace a list of algebraic constraints by another list of
/// algebraic constraints. We use an array of Option instead of
/// a Vec because this type needs to be `Copy`.
/// This is a separate type from `Action` because it is much larger.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ReplaceConstraintsAction {
    /// The constraints to be replaced. Up to 10, increase the size if needed.
    pub to_replace: [Option<Expr>; 10],
    /// The constraints to replace by. Up to 5, increase the size if needed.
    pub replace_by: [Option<Expr>; 5],
}


================================================
FILE: constraint-solver/src/runtime_constant.rs
================================================
use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub};

use num_traits::{One, Zero};
use powdr_number::FieldElement;

use crate::range_constraint::RangeConstraint;

/// Represents a run-time constant in the constraint solver, built over
/// a base field type.
/// The base field type itself (i.e. any T: FieldElement) represents a run-time constant
/// (which is also a compile-time constant), but the trait lets us represent run-time
/// constants symbolically as well.
pub trait RuntimeConstant:
    Sized
    + Neg<Output = Self>
    + Clone
    + From<Self::FieldType>
    + Add<Output = Self>
    + AddAssign<Self>
    + Sub<Output = Self>
    + Mul<Output = Self>
    + MulAssign<Self>
    + PartialEq
    + Eq
    + Zero
    + One
{
    type FieldType: FieldElement;

    /// Tries to convert the constant to a single number. This always works for compile-time constants.
    fn try_to_number(&self) -> Option<Self::FieldType>;

    /// Returns the range constraint for this constant. For compile-time constants,
    /// this will be a single value range constraint.
    fn range_constraint(&self) -> RangeConstraint<Self::FieldType>;

    /// Divides this constant by another constant, returning a new constant.
    fn field_div(&self, other: &Self) -> Self {
        self.clone() * other.field_inverse()
    }

    /// Returns the multiplicative inverse of this constant.
    fn field_inverse(&self) -> Self;

    /// Converts a u64 to a run-time constant.
    fn from_u64(k: u64) -> Self {
        Self::from(Self::FieldType::from(k))
    }

    /// Returns whether this constant is known to be zero at compile time.
    fn is_known_zero(&self) -> bool {
        self.try_to_number().is_some_and(|n| n.is_zero())
    }

    /// Returns whether this constant is known to be one at compile time.
    fn is_known_one(&self) -> bool {
        self.try_to_number().is_some_and(|n| n.is_one())
    }

    /// Returns whether this constant is known to be -1 at compile time.
    fn is_known_minus_one(&self) -> bool {
        self.try_to_number()
            .is_some_and(|n| n == -Self::FieldType::from(1))
    }

    /// Returns whether this constant is known to be non-zero at compile time.
    /// Note that this could return true even if the constant is not known fully
    /// at compile time, but it is guaranteed that the constant is not zero.
    fn is_known_nonzero(&self) -> bool {
        // Only checking range constraint is enough since if this is a known
        // fixed value, we will get a range constraint with just a single value.
        !self.range_constraint().allows_value(0.into())
    }
}

pub trait Substitutable<V> {
    /// Substitutes a variable with another constant.
    fn substitute(&mut self, variable: &V, substitution: &Self);
}

/// Provides a function to transform the type of variables in an expression.
/// The expectation is that the variable transformation function is injective, i.e.
/// two different variables cannot become equal through the transformation.
pub trait VarTransformable<V1, V2> {
    type Transformed;

    /// Transforms `self` by applying the `var_transform` function to all variables.
    fn transform_var_type(&self, var_transform: &mut impl FnMut(&V1) -> V2) -> Self::Transformed {
        self.try_transform_var_type(&mut |v| Some(var_transform(v)))
            .unwrap()
    }

    fn try_transform_var_type(
        &self,
        var_transform: &mut impl FnMut(&V1) -> Option<V2>,
    ) -> Option<Self::Transformed>;
}

impl<T: FieldElement> RuntimeConstant for T {
    type FieldType = T;

    fn try_to_number(&self) -> Option<Self> {
        Some(*self)
    }

    fn range_constraint(&self) -> RangeConstraint<Self::FieldType> {
        RangeConstraint::from_value(*self)
    }

    fn field_div(&self, other: &Self) -> Self {
        *self / *other
    }

    fn field_inverse(&self) -> Self {
        T::from(1) / *self
    }
}

impl<T: FieldElement, V> Substitutable<V> for T {
    fn substitute(&mut self, _variable: &V, _substitution: &Self) {
        // No-op for numbers.
    }
}

impl<T: FieldElement, V1, V2> VarTransformable<V1, V2> for T {
    type Transformed = T;

    fn transform_var_type(&self, _var_transform: &mut impl FnMut(&V1) -> V2) -> Self::Transformed {
        // No variables to transform.
        *self
    }

    fn try_transform_var_type(
        &self,
        _var_transform: &mut impl FnMut(&V1) -> Option<V2>,
    ) -> Option<Self::Transformed> {
        // No variables to transform.
        Some(*self)
    }
}


================================================
FILE: constraint-solver/src/solver/base.rs
================================================
use derivative::Derivative;
use itertools::Itertools;
use powdr_number::FieldElement;

use crate::constraint_system::{
    AlgebraicConstraint, BusInteraction, BusInteractionHandler, ConstraintRef,
};
use crate::effect::Effect;
use crate::grouped_expression::{GroupedExpression, RangeConstraintProvider};
use crate::indexed_constraint_system::IndexedConstraintSystemWithQueue;
use crate::range_constraint::RangeConstraint;
use crate::solver::boolean_extractor::BooleanExtractor;
use crate::solver::constraint_splitter::try_split_constraint;
use crate::solver::linearizer::Linearizer;
use crate::solver::var_transformation::Variable;
use crate::solver::{exhaustive_search, Error, Solver, VariableAssignment};
use crate::utils::possible_concrete_values;

use std::collections::{BTreeSet, HashMap, HashSet};
use std::fmt::Display;
use std::hash::Hash;
use std::iter::once;

/// Given a list of constraints, tries to derive as many variable assignments as possible.
///
/// It contains two main components that transform constraints: The boolean extractor and the linearizer.
///
/// The boolean extractor is run first and tries to turn quadratic constraints into affine constraints by
/// introducing new boolean variables.
///
/// The linearizer is run second and replaces all non-affine sub-components of constraints by new variables.
/// It also replaces bus interaction fields by new variables.
///
/// For both of these transforming components, the original constraints are also kept unmodified.
pub struct BaseSolver<T: FieldElement, V, BusInterHandler, VarDisp> {
    /// The constraint system to solve. During the solving process, any expressions will
    /// be simplified as much as possible.
    constraint_system: IndexedConstraintSystemWithQueue<T, V>,
    /// The handler for bus interactions.
    bus_interaction_handler: BusInterHandler,
    /// The currently known range constraints of the variables.
    range_constraints: RangeConstraints<T, V>,
    /// The concrete variable assignments or replacements that were derived for variables
    /// that do not occur in the constraints any more.
    /// This is cleared with every call to `solve()`.
    assignments_to_return: Vec<VariableAssignment<T, V>>,
    /// A cache of expressions that are equivalent to a given expression.
    equivalent_expressions_cache: HashMap<GroupedExpression<T, V>, Vec<GroupedExpression<T, V>>>,
    /// A dispenser for fresh variables.
    var_dispenser: VarDisp,
    /// The boolean extraction component.
    boolean_extractor: BooleanExtractor<T, V>,
    /// The linearizing component.
    linearizer: Linearizer<T, V>,
}

pub trait VarDispenser<V> {
    /// Returns a fresh new variable of kind "boolean".
    fn next_boolean(&mut self) -> V;

    /// Returns a fresh new variable of kind "linear".
    fn next_linear(&mut self) -> V;

    /// Returns an iterator over all variables of kind "linear" dispensed in the past.
    fn all_linearized_vars(&self) -> impl Iterator<Item = V>;
}

#[derive(Default)]
pub struct VarDispenserImpl {
    next_boolean_id: usize,
    next_linearized_id: usize,
}

impl<V> VarDispenser<Variable<V>> for VarDispenserImpl {
    fn next_boolean(&mut self) -> Variable<V> {
        let id = self.next_boolean_id;
        self.next_boolean_id += 1;
        Variable::Boolean(id)
    }

    fn next_linear(&mut self) -> Variable<V> {
        let id = self.next_linearized_id;
        self.next_linearized_id += 1;
        Variable::Linearized(id)
    }

    /// Returns an iterator over all linearized variables dispensed in the past.
    fn all_linearized_vars(&self) -> impl Iterator<Item = Variable<V>> {
        (0..self.next_linearized_id).map(Variable::Linearized)
    }
}

impl<T: FieldElement, V, B, VD: Default> BaseSolver<T, V, B, VD> {
    pub fn new(bus_interaction_handler: B) -> Self {
        BaseSolver {
            constraint_system: Default::default(),
            range_constraints: Default::default(),
            assignments_to_return: Default::default(),
            equivalent_expressions_cache: Default::default(),
            var_dispenser: Default::default(),
            boolean_extractor: Default::default(),
            linearizer: Default::default(),
            bus_interaction_handler,
        }
    }
}

impl<T, V, BusInter, VD> RangeConstraintProvider<T, V> for BaseSolver<T, V, BusInter, VD>
where
    V: Clone + Hash + Eq,
    T: FieldElement,
{
    fn get(&self, var: &V) -> RangeConstraint<T> {
        self.range_constraints.get(var)
    }
}

impl<T: FieldElement + Display, V: Clone + Ord + Hash + Display, BusInter, VD> Display
    for BaseSolver<T, V, BusInter, VD>
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.constraint_system)
    }
}

impl<T, V, BusInter: BusInteractionHandler<T>, VD: VarDispenser<V>> Solver<T, V>
    for BaseSolver<T, V, BusInter, VD>
where
    V: Ord + Clone + Hash + Eq + Display,
    T: FieldElement,
{
    fn solve(&mut self) -> Result<Vec<VariableAssignment<T, V>>, Error> {
        self.equivalent_expressions_cache.clear();
        self.loop_until_no_progress()?;
        let assignments = std::mem::take(&mut self.assignments_to_return);
        // Apply the deduced assignments to the substitutions we performed
        // while linearizing and boolean extracting.
        // We assume that the user of the solver applies the assignments to
        // their expressions and thus "incoming" expressions used in the functions
        // `range_constraint_for_expression` and `are_expressions_known_to_be_different`
        // will have the assignments applied.
        self.linearizer.apply_assignments(&assignments);
        self.boolean_extractor.apply_assignments(&assignments);
        Ok(assignments)
    }

    fn add_algebraic_constraints(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) {
        self.equivalent_expressions_cache.clear();

        let constraints = constraints
            .into_iter()
            .filter(|c| !c.is_redundant())
            .flat_map(|constr| {
                self.try_extract_boolean(constr.as_ref())
                    .into_iter()
                    .chain(std::iter::once(constr))
            })
            // needed because of unique access to the var dispenser / self.
            .collect_vec()
            .into_iter()
            .flat_map(|constr| self.linearize_constraint(constr))
            .collect_vec();

        self.constraint_system
            .add_algebraic_constraints(constraints.into_iter().filter(|c| !c.is_redundant()));
    }

    fn add_bus_interactions(
        &mut self,
        bus_interactions: impl IntoIterator<Item = BusInteraction<GroupedExpression<T, V>>>,
    ) {
        self.equivalent_expressions_cache.clear();
        let mut constraints_to_add = vec![];
        let bus_interactions = bus_interactions
            .into_iter()
            .map(|bus_interaction| {
                self.linearize_bus_interaction(bus_interaction, &mut constraints_to_add)
            })
            .collect_vec();
        // We only substituted by a variable, but the substitution was not yet linearized.
        self.add_algebraic_constraints(constraints_to_add);
        self.constraint_system
            .add_bus_interactions(bus_interactions);
    }

    fn add_range_constraint(&mut self, variable: &V, constraint: RangeConstraint<T>) {
        self.equivalent_expressions_cache.clear();
        self.apply_range_constraint_update(variable, constraint);
    }

    fn retain_variables(&mut self, variables_to_keep: &HashSet<V>) {
        self.equivalent_expressions_cache.clear();
        assert!(self.assignments_to_return.is_empty());

        // There are constraints that only contain `Variable::Linearized` that
        // connect quadratic terms with the original constraints. We could try to find
        // those, but let's just keep all of them for now.
        let mut variables_to_keep = variables_to_keep.clone();
        variables_to_keep.extend(self.var_dispenser.all_linearized_vars());

        self.constraint_system.retain_algebraic_constraints(|c| {
            c.referenced_unknown_variables()
                .any(|v| variables_to_keep.contains(v))
        });
        self.constraint_system
            .retain_bus_interactions(|bus_interaction| {
                bus_interaction
                    .referenced_unknown_variables()
                    .any(|v| variables_to_keep.contains(v))
            });
        let remaining_variables = self
            .constraint_system
            .system()
            .referenced_unknown_variables()
            .collect::<HashSet<_>>();
        self.range_constraints
            .range_constraints
            .retain(|v, _| remaining_variables.contains(v));
    }

    fn range_constraint_for_expression(
        &self,
        expr: &GroupedExpression<T, V>,
    ) -> RangeConstraint<T> {
        self.linearizer
            .internalized_versions_of_expression(expr)
            .fold(RangeConstraint::default(), |acc, expr| {
                acc.conjunction(&expr.range_constraint(self))
            })
    }

    fn try_to_equivalent_constant(&self, expr: &GroupedExpression<T, V>) -> Option<T> {
        self.linearizer
            .internalized_versions_of_expression(expr)
            .filter_map(|e| e.try_to_number())
            .next()
    }

    fn are_expressions_known_to_be_different(
        &mut self,
        a: &GroupedExpression<T, V>,
        b: &GroupedExpression<T, V>,
    ) -> bool {
        if let (Some(a), Some(b)) = (a.try_to_known(), b.try_to_known()) {
            return a != b;
        }
        let equivalent_to_a = self.equivalent_expressions(a);
        let equivalent_to_b = self.equivalent_expressions(b);
        equivalent_to_a
            .iter()
            .cartesian_product(&equivalent_to_b)
            .any(|(a_eq, b_eq)| {
                possible_concrete_values(&(a_eq - b_eq), self, 20)
                    .is_some_and(|mut values| values.all(|value| !value.is_zero()))
            })
    }
}

impl<T, V, BusInter: BusInteractionHandler<T>, VD: VarDispenser<V>> BaseSolver<T, V, BusInter, VD>
where
    V: Ord + Clone + Hash + Eq + Display,
    T: FieldElement,
{
    /// Tries to performs boolean extraction on `constr`, i.e. tries to turn quadratic constraints into affine constraints
    /// by introducing new boolean variables.
    fn try_extract_boolean(
        &mut self,
        constr: AlgebraicConstraint<&GroupedExpression<T, V>>,
    ) -> Option<AlgebraicConstraint<GroupedExpression<T, V>>> {
        let result = self
            .boolean_extractor
            .try_extract_boolean(constr, || self.var_dispenser.next_boolean())?;
        if let Some(var) = result.new_unconstrained_boolean_variable {
            // If we created a boolean variable, we constrain it to be boolean.
            self.add_range_constraint(&var, RangeConstraint::from_mask(1));
        }
        Some(result.constraint)
    }

    /// Performs linearization of `constr`, i.e. replaces all non-affine sub-components of the constraint
    /// by new variables.
    /// This function will always return the original constraint as well as the linearized constraints
    /// and equivalences needed after linearization.
    fn linearize_constraint(
        &mut self,
        constr: AlgebraicConstraint<GroupedExpression<T, V>>,
    ) -> impl Iterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>> {
        let mut constrs = vec![constr.clone()];
        if !constr.expression.is_affine() {
            let linearized = self.linearizer.linearize_expression(
                constr.expression,
                &mut || self.var_dispenser.next_linear(),
                &mut constrs,
            );
            constrs.push(AlgebraicConstraint::assert_zero(linearized));
        }
        constrs.into_iter()
    }

    /// Replaces all bus interaction fields by new variables.
    /// Adds the equality constraint to `constraint_collection` and returns the modified
    /// bus interaction.
    ///
    /// Note that the constraints added to `constraint_collection` are not yet boolean-extracted or linearized.
    fn linearize_bus_interaction(
        &mut self,
        bus_interaction: BusInteraction<GroupedExpression<T, V>>,
        constraint_collection: &mut Vec<AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) -> BusInteraction<GroupedExpression<T, V>> {
        bus_interaction
            .fields()
            .map(|expr| {
                self.linearizer.substitute_by_var(
                    expr.clone(),
                    &mut || self.var_dispenser.next_linear(),
                    constraint_collection,
                )
            })
            .collect()
    }
}

impl<T, V, BusInter: BusInteractionHandler<T>, VD> BaseSolver<T, V, BusInter, VD>
where
    V: Ord + Clone + Hash + Eq + Display,
    T: FieldElement,
    VD: VarDispenser<V>,
{
    fn loop_until_no_progress(&mut self) -> Result<(), Error> {
        loop {
            let mut progress = false;
            // Try solving constraints in isolation.
            progress |= self.solve_in_isolation()?;

            if !progress {
                // This might be expensive, so we only do it if we made no progress
                // in the previous steps.
                progress |= self.exhaustive_search()?;
            }

            if !progress {
                break;
            }
        }
        Ok(())
    }

    /// Tries to make progress by solving each constraint in isolation.
    fn solve_in_isolation(&mut self) -> Result<bool, Error> {
        let mut progress = false;
        while let Some(item) = self.constraint_system.pop_front() {
            let effects = match item {
                ConstraintRef::AlgebraicConstraint(c) => {
                    if let Some((v1, expr)) = try_to_simple_equivalence(c) {
                        self.apply_assignment(&v1, &expr);
                        continue;
                    }
                    let effects = c
                        .solve(&self.range_constraints)
                        .map_err(Error::AlgebraicSolverError)?
                        .effects;
                    if let Some(components) = try_split_constraint(&c, &self.range_constraints) {
                        progress |= self.add_algebraic_constraints_if_new(components);
                    }
                    effects
                }
                ConstraintRef::BusInteraction(b) => b
                    .solve(&self.bus_interaction_handler, &self.range_constraints)
                    .map_err(|_| Error::BusInteractionError)?,
            };
            for effect in effects {
                progress |= self.apply_effect(effect);
            }
        }
        Ok(progress)
    }

    /// Find groups of variables with a small set of possible assignments.
    /// For each group, performs an exhaustive search in the possible assignments
    /// to deduce new range constraints (also on other variables).
    /// This might be expensive.
    fn exhaustive_search(&mut self) -> Result<bool, Error> {
        log::debug!("Starting exhaustive search...");
        let mut variable_sets =
            exhaustive_search::get_brute_force_candidates(self.constraint_system.system(), &*self)
                .collect_vec();
        // Start with small sets to make larger ones redundant after some assignments.
        variable_sets.sort_by_key(|set| set.len());

        log::debug!(
            "Found {} sets of variables with few possible assignments. Checking each set...",
            variable_sets.len()
        );

        let mut progress = false;
        let mut unsuccessful_variable_sets = BTreeSet::new();

        for mut variable_set in variable_sets {
            variable_set.retain(|v| {
                self.range_constraints
                    .get(v)
                    .try_to_single_value()
                    .is_none()
            });
            if unsuccessful_variable_sets.contains(&variable_set) {
                // It can happen that we process the same variable set twice because
                // assignments can make previously different sets equal.
                // We have processed this variable set before, and it did not
                // yield new information.
                // It could be that other assignments created in the meantime
                // lead to progress but this is rare and we will catch it in the
                // next loop iteration.
                continue;
            }
            match exhaustive_search::exhaustive_search_on_variable_set(
                self.constraint_system.system(),
                &variable_set,
                &*self,
                &self.bus_interaction_handler,
            ) {
                Ok(assignments) if assignments.is_empty() => {
                    // No new information was found.
                    unsuccessful_variable_sets.insert(variable_set);
                }
                Ok(assignments) => {
                    for (var, rc) in assignments {
                        progress |= self.apply_range_constraint_update(&var, rc);
                    }
                }
                // Might error out if a contradiction was found.
                Err(e) => return Err(e),
            }
        }
        Ok(progress)
    }

    /// Returns a vector of expressions that are equivalent to `expression`.
    /// The vector is always non-empty, it returns at least `expression` itself.
    fn equivalent_expressions(
        &mut self,
        expression: &GroupedExpression<T, V>,
    ) -> Vec<GroupedExpression<T, V>> {
        if expression.is_quadratic() {
            // This case is too complicated.
            return vec![expression.clone()];
        }
        if let Some(equiv) = self.equivalent_expressions_cache.get(expression) {
            return equiv.clone();
        }

        // Go through the constraints related to this expression
        // and try to solve for the expression
        let mut exprs = self
            .constraint_system
            .system()
            .constraints_referencing_variables(expression.referenced_unknown_variables())
            .filter_map(|constr| match constr {
                ConstraintRef::AlgebraicConstraint(constr) => Some(constr),
                ConstraintRef::BusInteraction(_) => None,
            })
            .flat_map(|constr| constr.try_solve_for_expr(expression))
            .collect_vec();
        if exprs.is_empty() {
            // If we cannot solve for the expression, we just take the expression unmodified.
            exprs.push(expression.clone());
        }
        self.equivalent_expressions_cache
            .insert(expression.clone(), exprs.clone());
        exprs
    }

    fn apply_effect(&mut self, effect: Effect<T, V>) -> bool {
        match effect {
            Effect::Assignment(v, expr) => {
                self.apply_assignment(&v, &GroupedExpression::from_runtime_constant(expr))
            }
            Effect::RangeConstraint(v, range_constraint) => {
                self.apply_range_constraint_update(&v, range_constraint)
            }
            Effect::Assertion(..) => unreachable!(),
            // There are no known-but-not-concrete variables, so we should never
            // encounter a conditional assignment.
            Effect::ConditionalAssignment { .. } => unreachable!(),
        }
    }

    fn apply_range_constraint_update(
        &mut self,
        variable: &V,
        range_constraint: RangeConstraint<T>,
    ) -> bool {
        if self.range_constraints.update(variable, &range_constraint) {
            let new_rc = self.range_constraints.get(variable);
            if let Some(value) = new_rc.try_to_single_value() {
                self.apply_assignment(variable, &GroupedExpression::from_number(value));
            } else {
                // The range constraint was updated.
                log::trace!("({variable}: {range_constraint})");
                self.constraint_system.variable_updated(variable);
            }
            true
        } else {
            false
        }
    }

    fn apply_assignment(&mut self, variable: &V, expr: &GroupedExpression<T, V>) -> bool {
        log::debug!("({variable} := {expr})");
        self.constraint_system.substitute_by_unknown(variable, expr);

        let mut vars_to_boolean_constrain = vec![];
        let new_constraints = self
            .constraint_system
            .system()
            .constraints_referencing_variables(once(variable))
            .filter_map(|constr| match constr {
                ConstraintRef::AlgebraicConstraint(c) => Some(c),
                ConstraintRef::BusInteraction(_) => None,
            })
            .flat_map(|constr| {
                let result = self
                    .boolean_extractor
                    .try_extract_boolean(constr, &mut || self.var_dispenser.next_boolean())?;
                vars_to_boolean_constrain.extend(result.new_unconstrained_boolean_variable);
                Some(result.constraint)
            })
            .collect_vec();
        for v in vars_to_boolean_constrain {
            self.add_range_constraint(&v, RangeConstraint::from_mask(1));
        }

        self.add_algebraic_constraints(new_constraints);

        self.assignments_to_return
            .push((variable.clone(), expr.clone()));
        true
    }

    /// Adds constraints that do not yet exist in the system.
    /// Returns true if at least one new constraint was added.
    fn add_algebraic_constraints_if_new(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) -> bool {
        let constraints_to_add = constraints
            .into_iter()
            .filter(|constraint_to_add| !self.contains_algebraic_constraint(constraint_to_add))
            .collect_vec();
        if constraints_to_add.is_empty() {
            false
        } else {
            self.add_algebraic_constraints(constraints_to_add);
            true
        }
    }

    /// Returns true if the system contains the given algebraic constraint.
    fn contains_algebraic_constraint(
        &self,
        constraint: &AlgebraicConstraint<GroupedExpression<T, V>>,
    ) -> bool {
        let constraint_ref = ConstraintRef::AlgebraicConstraint(constraint.as_ref());
        let vars = constraint.referenced_unknown_variables();
        self.constraint_system
            .system()
            .constraints_referencing_variables(vars)
            .contains(&constraint_ref)
    }
}

/// If the constraint is equivalent to `X = Y` for some variables `X` and `Y`,
/// returns the "larger" variable and the result of solving the constraint
/// for the variable.
///
/// Note: Does not find all cases of equivalence.
fn try_to_simple_equivalence<T: FieldElement, V: Clone + Ord + Eq>(
    constr: AlgebraicConstraint<&GroupedExpression<T, V>>,
) -> Option<(V, GroupedExpression<T, V>)> {
    if !constr.expression.is_affine() {
        return None;
    }
    if !constr.expression.constant_offset().is_zero() {
        return None;
    }
    let linear = constr.expression.linear_components();
    let [(v1, c1), (v2, c2)] = linear.collect_vec().try_into().ok()?;
    // We have `c1 * v1 + c2 * v2 = 0`, which is equivalent to
    // `v1 = -c2 / c1 * v2`
    if (-*c2 / *c1).is_one() {
        Some((
            v2.clone(),
            GroupedExpression::from_unknown_variable(v1.clone()),
        ))
    } else {
        None
    }
}

/// The currently known range constraints for the variables.
#[derive(Derivative)]
#[derivative(Default(bound = ""))]
pub struct RangeConstraints<T: FieldElement, V> {
    pub range_constraints: HashMap<V, RangeConstraint<T>>,
}

impl<T: FieldElement, V: Clone + Hash + Eq> RangeConstraintProvider<T, V>
    for RangeConstraints<T, V>
{
    fn get(&self, var: &V) -> RangeConstraint<T> {
        self.range_constraints.get(var).cloned().unwrap_or_default()
    }
}

impl<T: FieldElement, V: Clone + Hash + Eq> RangeConstraints<T, V> {
    /// Adds a new range constraint for the variable.
    /// Returns `true` if the new combined constraint is tighter than the existing one.
    fn update(&mut self, variable: &V, range_constraint: &RangeConstraint<T>) -> bool {
        let existing = self.get(variable);
        let new = existing.conjunction(range_constraint);
        if new != existing {
            self.range_constraints.insert(variable.clone(), new);
            true
        } else {
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::bus_interaction_handler::DefaultBusInteractionHandler;
    use expect_test::expect;

    use super::*;

    use powdr_number::GoldilocksField;

    type VarName = &'static str;
    type Var = Variable<VarName>;
    type Qse = GroupedExpression<GoldilocksField, Var>;

    fn var(name: VarName) -> Qse {
        Qse::from_unknown_variable(Variable::Original(name))
    }

    fn constant(value: u64) -> Qse {
        Qse::from_number(GoldilocksField::from(value))
    }

    #[test]
    fn expression_simplification() {
        let mut solver =
            BaseSolver::<_, _, _, VarDispenserImpl>::new(DefaultBusInteractionHandler::default());
        solver.add_algebraic_constraints(
            [
                // Boolean flags
                var("flag0") * (var("flag0") - constant(1)),
                var("flag1") * (var("flag1") - constant(1)),
                var("flag2") * (var("flag2") - constant(1)),
                // Exactly one flag is active
                var("flag0") + var("flag1") + var("flag2") - constant(1),
                // This SHOULD simplify to `v - fp - 1`, but is currently not:
                // https://github.com/powdr-labs/powdr/issues/3653
                // Note that if we remove `fp` here it works: Exhaustive search figures out
                // that v = 1 for all possible assignments of the flags.
                var("v") - var("fp") - (var("flag0") + var("flag1") + var("flag2")),
            ]
            .into_iter()
            .map(AlgebraicConstraint::assert_zero),
        );
        solver.solve().unwrap();

        expect![[r#"
            (flag0) * (flag0 - 1) = 0
            flag0 - lin_0 - 1 = 0
            (flag0) * (lin_0) = 0
            0 = 0
            (flag1) * (flag1 - 1) = 0
            flag1 - lin_2 - 1 = 0
            (flag1) * (lin_2) = 0
            0 = 0
            (flag2) * (flag2 - 1) = 0
            flag2 - lin_4 - 1 = 0
            (flag2) * (lin_4) = 0
            0 = 0
            flag0 + flag1 + flag2 - 1 = 0
            -(flag0 + flag1 + flag2 + fp - v) = 0"#]]
        .assert_eq(&solver.to_string());
    }

    #[test]
    fn is_known_to_by_nonzero() {
        let mut solver =
            BaseSolver::<_, _, _, VarDispenserImpl>::new(DefaultBusInteractionHandler::default());
        assert!(!solver.are_expressions_known_to_be_different(&constant(0), &constant(0)));
        assert!(solver.are_expressions_known_to_be_different(&constant(1), &constant(0)));
        assert!(solver.are_expressions_known_to_be_different(&constant(7), &constant(0)));
        assert!(solver.are_expressions_known_to_be_different(&-constant(1), &constant(0)));

        assert!(
            !(solver.are_expressions_known_to_be_different(
                &(constant(42) - constant(2) * var("a")),
                &constant(0)
            ))
        );
        assert!(
            !(solver.are_expressions_known_to_be_different(&(var("a") - var("b")), &constant(0)))
        );

        solver.add_range_constraint(
            &Variable::Original("a"),
            RangeConstraint::from_range(GoldilocksField::from(3), GoldilocksField::from(4)),
        );
        solver.add_range_constraint(
            &Variable::Original("b"),
            RangeConstraint::from_range(GoldilocksField::from(3), GoldilocksField::from(4)),
        );
        assert!(solver.are_expressions_known_to_be_different(&(var("a")), &constant(0)));
        assert!(solver.are_expressions_known_to_be_different(
            // If we try all possible assignments of a and b, this expression
            // can never be zero.
            &(var("a") - constant(2) * var("b")),
            &constant(0)
        ));
        assert!(!solver.are_expressions_known_to_be_different(
            // Can be zero for a = 4, b = 3.
            &(constant(3) * var("a") - constant(4) * var("b")),
            &constant(0)
        ));
    }
}


================================================
FILE: constraint-solver/src/solver/boolean_extractor.rs
================================================
use std::{cmp::min, collections::HashMap, hash::Hash};

use derivative::Derivative;
use itertools::Itertools;
use powdr_number::{FieldElement, LargeInt};

use crate::{
    constraint_system::AlgebraicConstraint, grouped_expression::GroupedExpression,
    indexed_constraint_system::apply_substitutions_to_expressions, solver::VariableAssignment,
};

#[derive(Derivative)]
#[derivative(Default(bound = ""))]
pub struct BooleanExtractor<T, V> {
    /// If (expr, Some(z)) is in the map, it means that
    /// we have transformed a constraint `left * right = 0` into
    /// `right + z * offset = 0`, where `z` is a new boolean variable
    /// and `expr = -right / offset = z`.
    ///
    /// If (expr, None) is in the map, it means that
    /// we have transformed a constraint `right * right = 0` into
    /// `right = 0`, which is a special case where we do not need
    /// a new boolean variable.
    substitutions: HashMap<GroupedExpression<T, V>, Option<V>>,
}

pub struct BooleanExtractionValue<T, V> {
    pub constraint: AlgebraicConstraint<GroupedExpression<T, V>>,
    pub new_unconstrained_boolean_variable: Option<V>,
}

impl<T: FieldElement, V: Ord + Clone + Hash + Eq> BooleanExtractor<T, V> {
    /// Tries to simplify a quadratic constraint by transforming it into an affine
    /// constraint that makes use of a new boolean variable.
    /// NOTE: The boolean constraint is not part of the output.
    ///
    /// Returns the new constraint and the new variable if required.
    ///
    /// If the same simplification has been performed before, it will
    /// return None (in particular, it will not request a new variable).
    ///
    /// For example `(a + b) * (a + b + 10) = 0` can be transformed into
    /// `a + b + z * 10 = 0`, where `z` is a new boolean variable.
    ///
    /// @param constraint The quadratic constraint to transform.
    /// @param var_dispenser A function that returns a new variable that is assumed to be boolean-constrained.
    /// It will only be called if the transformation is performed.
    pub fn try_extract_boolean(
        &mut self,
        constraint: AlgebraicConstraint<&GroupedExpression<T, V>>,
        mut var_dispenser: impl FnMut() -> V,
    ) -> Option<BooleanExtractionValue<T, V>> {
        let (left, right) = constraint.expression.try_as_single_product()?;
        // We want to check if `left` and `right` differ by a constant offset.
        // Since multiplying the whole constraint by a non-zero constant does
        // not change the constraint, we also transform `left` by a constant
        // (non-zero) factor.
        // So we are looking for an offset `c` and a non-zero constant factor `f`
        // such that `f * left = right + c`.
        // Then we can write the original constraint `left * right = 0` as
        // `(right + c) * right = 0` (we can just ignore `f`).
        // This is in turn equivalent to `right + z * c = 0`, where `z` is
        // a new boolean variable.

        // For example, if the constraint was `(2 * a + 2 * b) * (a + b + 10) = 0`, we would
        // set `factor = 1 / 2`, such that `left * factor - right` is a constant.

        // First, try to find a good factor so that `left` and `right`
        // likely cancel out except for a constant. As a good guess,
        // we try to match the coefficient of the first variable.
        let factor = match (
            left.linear_components().next(),
            right.linear_components().next(),
        ) {
            (Some((left_var, left_coeff)), Some((right_var, right_coeff)))
                if left_var == right_var =>
            {
                *right_coeff / *left_coeff
            }
            _ => T::one(),
        };

        // `constr = 0` is equivalent to `left * right = 0`
        let offset = &(left.clone() * factor) - right;
        // We only do the transformation if `offset` is known, because
        // otherwise the constraint stays quadratic.
        let offset = *offset.try_to_known()?;
        // We know that `offset + right = left` and thus
        // `constr = 0` is equivalent to `right * (right + offset) = 0`
        // which is equivalent to `right + z * offset = 0` for a new
        // boolean variable `z`.

        if offset.is_zero() {
            // In this special case, we do not need a new variable.
            if self.substitutions.contains_key(right) {
                None
            } else {
                self.substitutions.insert(right.clone(), None);
                Some(BooleanExtractionValue {
                    constraint: AlgebraicConstraint::assert_zero(right.clone()),
                    new_unconstrained_boolean_variable: None,
                })
            }
        } else {
            // We can substitute the initial constraint using a new boolean variable `z`
            // either by
            // `0 = right + z * offset`
            // or by
            // `0 = right + (1 - z) * offset = right + offset - z * offset`,
            // which is equivalent to
            // `0 = -right - offset + z * offset`.
            // We use the one that has a smaller constant offset in the resulting expression.
            let expr = [
                right.clone(),
                -right - GroupedExpression::from_runtime_constant(offset),
            ]
            .into_iter()
            .min_by_key(|e| {
                // Return the abs of the constant offset, or None on larger fields.
                try_to_abs_u64(*e.constant_offset())
            })
            .unwrap();

            let key = -&expr * (T::one() / offset);
            if self.substitutions.contains_key(&key) {
                // We have already performed this transformation before.
                return None;
            }

            if key.try_to_simple_unknown().is_some() {
                // In this case we don't gain anything because the new variable `z` will just
                // be equivalent to the single variable in `right`.
                None
            } else {
                let z = var_dispenser();

                self.substitutions.insert(key, Some(z.clone()));

                // We return `expr + z * offset == 0`, which is equivalent to the original constraint.
                Some(BooleanExtractionValue {
                    constraint: AlgebraicConstraint::assert_zero(
                        expr + (GroupedExpression::from_unknown_variable(z.clone()) * offset),
                    ),
                    new_unconstrained_boolean_variable: Some(z),
                })
            }
        }
    }
}

fn try_to_abs_u64<T: FieldElement>(x: T) -> Option<u64> {
    let modulus = T::modulus().try_into_u64()?;
    let x = x.to_integer().try_into_u64()?;
    Some(min(x, modulus - x))
}

impl<T: FieldElement, V: Clone + Eq + Ord + Hash> BooleanExtractor<T, V> {
    /// Applies the assignments to the stored substitutions.
    pub fn apply_assignments(&mut self, assignments: &[VariableAssignment<T, V>]) {
        if assignments.is_empty() {
            return;
        }
        let (exprs, vars): (Vec<_>, Vec<_>) = self.substitutions.drain().unzip();
        let exprs = apply_substitutions_to_expressions(exprs, assignments.iter().cloned());
        self.substitutions = exprs.into_iter().zip_eq(vars).collect();
    }
}

#[cfg(test)]
mod tests {
    use powdr_number::GoldilocksField;

    use super::*;

    type Var = &'static str;
    type Qse = GroupedExpression<GoldilocksField, Var>;

    fn var(name: Var) -> Qse {
        Qse::from_unknown_variable(name)
    }

    fn constant(value: u64) -> Qse {
        Qse::from_number(GoldilocksField::from(value))
    }

    #[test]
    fn test_extract_boolean() {
        let mut var_dispenser = || "z";
        let expr = (var("a") + var("b")) * (var("a") + var("b") + constant(10));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .unwrap();
        assert_eq!(result.constraint.to_string(), "-(a + b + 10 * z) = 0");
        assert_eq!(result.new_unconstrained_boolean_variable, Some("z"));
    }

    #[test]
    fn test_extract_boolean_square() {
        let mut var_dispenser = || "z";
        let expr = (var("a") + var("b")) * (var("a") + var("b"));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .unwrap();
        assert_eq!(result.constraint.to_string(), "a + b = 0");
        assert_eq!(result.new_unconstrained_boolean_variable, None);
    }

    #[test]
    fn test_extract_boolean_useless() {
        let mut var_dispenser = || "z";
        let expr = (var("a") - constant(1)) * (var("a"));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser);
        assert!(result.is_none());

        let expr = (constant(2) * var("a") - constant(2)) * (constant(2) * var("a"));
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser);
        assert!(result.is_none());
    }

    #[test]
    fn do_not_extract_twice() {
        let mut var_dispenser = || "z";
        let expr = (var("a") + var("b")) * (var("a") + var("b") + constant(10));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .unwrap();
        assert_eq!(result.constraint.to_string(), "-(a + b + 10 * z) = 0");
        assert_eq!(result.new_unconstrained_boolean_variable, Some("z"));

        assert!(extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .is_none());

        // left and right swapped
        assert!(extractor
            .try_extract_boolean(
                AlgebraicConstraint::assert_zero(
                    &(var("a") + var("b") + constant(10) * (var("a") + var("b")))
                ),
                &mut var_dispenser
            )
            .is_none());

        let expr2 = (constant(2) * (var("a") + var("b"))) * (var("a") + var("b") + constant(10));
        assert!(extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr2), &mut var_dispenser)
            .is_none());

        let expr3 = (var("a") + var("b")) * (constant(2) * (var("a") + var("b") + constant(10)));
        assert!(extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr3), &mut var_dispenser)
            .is_none());

        // This is different because the effective constant is different.
        let expr4 = (var("a") + var("b")) * (constant(2) * (var("a") + var("b") + constant(20)));
        assert_eq!(
            extractor
                .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr4), &mut var_dispenser)
                .unwrap()
                .constraint
                .to_string(),
            "-(2 * a + 2 * b + 40 * z) = 0"
        );
    }

    #[test]
    fn do_not_extract_squares_twice() {
        let mut var_dispenser = || "z";
        let expr = (var("a") + var("b")) * (var("a") + var("b"));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .unwrap();

        assert_eq!(result.constraint.to_string(), "a + b = 0");
        assert_eq!(result.new_unconstrained_boolean_variable, None);

        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser);
        assert!(result.is_none());
    }

    #[test]
    fn apply_assignments() {
        let mut counter = 0;
        let vars = (0..10).map(|i| format!("z_{i}")).collect_vec();
        let mut var_dispenser = || {
            counter += 1;
            vars[counter - 1].as_str()
        };
        let expr =
            (var("a") + var("b") + var("k")) * (var("a") + var("b") + var("k") - constant(2));
        let mut extractor: BooleanExtractor<_, _> = Default::default();
        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr), &mut var_dispenser)
            .unwrap();
        assert_eq!(result.constraint.to_string(), "-(a + b + k - 2 * z_0) = 0");
        assert_eq!(result.new_unconstrained_boolean_variable, Some("z_0"));

        extractor.apply_assignments(&[("k", -constant(9))]);
        let expr2 =
            (var("a") + var("b") - constant(9)) * (var("a") + var("b") - constant(9) - constant(2));

        let result = extractor
            .try_extract_boolean(AlgebraicConstraint::assert_zero(&expr2), &mut var_dispenser);
        assert!(result.is_none());
    }
}


================================================
FILE: constraint-solver/src/solver/constraint_splitter.rs
================================================
use std::{
    fmt::Display,
    ops::{Add, Div},
};

use itertools::Itertools;
use num_traits::Zero;
use powdr_number::{FieldElement, LargeInt};

use crate::{
    constraint_system::AlgebraicConstraint,
    grouped_expression::{GroupedExpression, RangeConstraintProvider},
    range_constraint::RangeConstraint,
};

/// Tries to split the given algebraic constraint into a list of equivalent
/// algebraic constraints.
/// This is the case for example if the variables in this expression can
/// be split into different bit areas.
///
/// The core idea (which is applied multiple times) is as follows:
///
/// Suppose we have the constraint `x + k * y + c = 0` with `x` and `y` being
/// variables (or expressions containing variables) and `k` and `c` are constants.
/// Furthermore, the range constraints of `x` and `y` are such that no wrapping
/// occurs in the operations, i.e. the constraint is equivalent to the same
/// constraint in the natural numbers.
///
/// Then the same constraint is also true modulo `k`, where we get
/// `x % k + c % k = 0`. If this equation has a unique solution `s` in the range
/// constraints for `x`, we get a new constraint `x - s = 0`. We can subtract
/// that constraint from the original to get `k * y + c - s = 0` and iterate.
pub fn try_split_constraint<T: FieldElement, V: Clone + Ord + Display>(
    constraint: &AlgebraicConstraint<&GroupedExpression<T, V>>,
    range_constraints: &impl RangeConstraintProvider<T, V>,
) -> Option<Vec<AlgebraicConstraint<GroupedExpression<T, V>>>> {
    let expression = constraint.expression;
    if expression.is_quadratic() {
        // We cannot split quadratic constraints.
        return None;
    }
    if expression
        .linear_components()
        .any(|(var, _)| range_constraints.get(var).is_unconstrained())
    {
        // If any variable is unconstrained, we cannot split.
        return None;
    }

    let mut constant = *expression.constant_offset();

    // Turn the linear part into components ("coefficient * expression"),
    // and combine components with the same coefficient, ending up with
    // components of the form "coefficient * (var1 + var2 - var3)".
    let mut components = group_components_by_coefficients(
        expression
            .linear_components()
            .map(|(var, coeff)| Component::try_from((var, coeff)).ok())
            .collect::<Option<Vec<_>>>()?,
    )
    .collect_vec();
    if components.len() < 2 {
        return None;
    }

    // The original constraint is equivalent to `sum of components + constant = 0`

    // Now try to split out each component in turn, modifying `components`
    // and `constant` for every successful split.
    let mut extracted_parts = vec![];
    for index in 0..components.len() {
        let candidate = &components[index];
        let rest = components
            .iter()
            .enumerate()
            // Filter out the candidate itself and all zero components
            // because we set components to zero when we extract them instead
            // of removing them.
            .filter(|(i, component)| *i != index && !component.is_zero())
            .map(|(_, comp)| (comp.clone() / candidate.coeff).normalize())
            .collect_vec();
        if rest.is_empty() {
            // Nothing to split, we are done.
            break;
        }

        // The original constraint is equivalent to
        // `candidate.expr + rest + constant / candidate.coeff = 0`.

        // The idea is to find some `k` such that the equation has the form
        // `expr + k * rest' + constant' = 0` and it is equivalent to
        // the same expression in the natural numbers. Then we apply `x -> x % k` to the whole equation
        // to obtain `expr % k + constant' % k = 0`. Finally, we check if it has a unique solution.

        // We start by finding a good `k`. It is likely wo work better if the factor exists
        // in all components of `rest`, so the GCD of the coefficients of the components would
        // be best, but we just try the smallest coefficient.
        let smallest_coeff_in_rest = rest.iter().map(|comp| comp.coeff).min().unwrap();
        assert_ne!(smallest_coeff_in_rest, 0.into());
        assert!(smallest_coeff_in_rest.is_in_lower_half());

        // Try to find the unique value for `candidate.expr` in this equation.
        if let Some(solution) = find_solution(
            &candidate.expr,
            smallest_coeff_in_rest,
            rest.into_iter()
                .map(|comp| GroupedExpression::from(comp / smallest_coeff_in_rest))
                .sum(),
            constant / candidate.coeff,
            range_constraints,
        ) {
            // We now know that `candidate.expr = solution`, so we add it to the extracted parts.
            extracted_parts.push(AlgebraicConstraint::assert_eq(
                candidate.expr.clone(),
                GroupedExpression::from_number(solution),
            ));
            // We remove the candidate (`candidate.coeff * candidate.expr`) from the expression.
            // To balance this out, we add `candidate.coeff * candidate.expr = candidate.coeff * solution`
            // to the constant.
            constant += solution * candidate.coeff;
            components[index] = Zero::zero();
        }
    }
    if extracted_parts.is_empty() {
        None
    } else {
        // We found some independent parts, add the remaining components to the parts
        // and return them.
        extracted_parts.push(recombine_components(components, constant));
        Some(extracted_parts)
    }
}

/// Groups a sequence of components (thought of as a sum) by coefficients
/// so that its sum does not change.
/// Before grouping, the components are normalized such that the coefficient is always
/// in the lower half of the field (and the expression might be negated to compensate).
/// The list is sorted by the coefficient.
fn group_components_by_coefficients<T: FieldElement, V: Clone + Ord + Display>(
    components: impl IntoIterator<Item = Component<T, V>>,
) -> impl Iterator<Item = Component<T, V>> {
    components
        .into_iter()
        .map(|c| c.normalize())
        .into_grouping_map_by(|c| c.coeff)
        .sum()
        .into_iter()
        .filter(|(_, expr)| !expr.is_zero())
        .map(|(_, comp)| comp)
        .sorted_by_key(|comp| comp.coeff.to_integer())
}

/// If this returns `Some(x)`, then `x` is the only valid value for `expr` in the equation
/// `expr + coefficient * rest + constant = 0`.
/// It does not make assumptions about its inputs.
/// We try to translate the equation to an equation in the natural numbers
/// and try to find a unique solution.
fn find_solution<T: FieldElement, V: Clone + Ord + Display>(
    expr: &GroupedExpression<T, V>,
    coefficient: T,
    rest: GroupedExpression<T, V>,
    constant: T,
    range_constraints: &impl RangeConstraintProvider<T, V>,
) -> Option<T> {
    let expr_rc = expr.range_constraint(range_constraints);
    let rest_rc = rest.range_constraint(range_constraints);

    let unconstrained_range_width = RangeConstraint::<T>::unconstrained().range_width();
    if expr_rc.range_width() == unconstrained_range_width
        || rest_rc.range_width() == unconstrained_range_width
    {
        // We probably cannot translate this into the natural numbers.
        return None;
    }

    // Both range constraints have a "gap'. We shift the gap such that the
    // lower bounds for both `expr` and `rest` are zero.
    if expr_rc.range().0 != 0.into() {
        let shift = expr_rc.range().0;
        return find_solution(
            &(expr - &GroupedExpression::from_number(shift)),
            coefficient,
            rest,
            constant + shift,
            range_constraints,
        )
        .map(|s| s + shift);
    } else if rest_rc.range().0 != 0.into() {
        return find_solution(
            expr,
            coefficient,
            rest - GroupedExpression::from_number(rest_rc.range().0),
            constant + coefficient * rest_rc.range().0,
            range_constraints,
        );
    }

    // rc(expr): [0, max_expr]
    // rc(rest): [0, max_rest]
    // If max_expr + k * max_rest < P, then we can translate the equation to the natural numbers:
    // expr + k * rest = (-constant) % modulus

    let max_expr = expr_rc.range().1;
    let max_rest = rest_rc.range().1;

    // Evaluate `expr + coefficient * rest` for the largest possible value
    // and see if it wraps around in the field.
    if max_expr.to_arbitrary_integer()
        + coefficient.to_arbitrary_integer() * max_rest.to_arbitrary_integer()
        >= T::modulus().to_arbitrary_integer()
    {
        return None;
    }
    // It does not wrap around, so we know that the equation can be translated to the
    // natural numbers:
    // expr + coefficient * rest = (-constant) % modulus

    // Next, we apply `x -> x % coefficient` to both sides of the equation to get
    // expr % coefficient = ((-constant) % modulus) % coefficient
    // Note that at this point, we only get an implication, not an equivalence,
    // but if the range constraints of `expr` only allow a unique solution,
    // it holds unconditionally.

    if max_expr.to_integer() >= coefficient.to_integer() + coefficient.to_integer() {
        // In this case, there are always at least two solutions (ignoring masks and other
        // constraints).
        return None;
    }

    // TODO this only works for fields that fit 64 bits, but that is probably fine for now.
    let rhs = T::from(
        (-constant).to_integer().try_into_u64().unwrap()
            % coefficient.to_integer().try_into_u64().unwrap(),
    );

    // Now we try `rhs`, `rhs + coefficient`, `rhs + 2 * coefficient`, ...
    // But because of the check above, we can stop at `2 * coefficient`.
    (0..=1)
        .map(|i| rhs + T::from(i) * coefficient)
        .filter(|candidate| expr_rc.allows_value(*candidate))
        .exactly_one()
        .ok()
}

/// Turns the remaining components and constant into a single constraint,
/// i.e. returns an algebraic constraint that is equivalent to
/// `sum of components + constant = 0`.
fn recombine_components<T: FieldElement, V: Clone + Ord + Display>(
    components: Vec<Component<T, V>>,
    constant: T,
) -> AlgebraicConstraint<GroupedExpression<T, V>> {
    let remaining = components
        .into_iter()
        .filter(|comp| !comp.is_zero())
        .collect_vec();
    AlgebraicConstraint::assert_zero(match remaining.as_slice() {
        [Component { coeff, expr }] => {
            // if there is only one component, we normalize
            expr + &GroupedExpression::from_number(constant / *coeff)
        }
        _ => {
            remaining
                .into_iter()
                .map(|comp| comp.into())
                .sum::<GroupedExpression<_, _>>()
                + GroupedExpression::from_number(constant)
        }
    })
}

/// A component of a constraint. Equivalent to the expression `coeff * expr`.
#[derive(Clone)]
struct Component<T, V> {
    coeff: T,
    expr: GroupedExpression<T, V>,
}

impl<T: FieldElement, V: Clone + Ord + Display> Display for Component<T, V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} * ({})", self.coeff, self.expr)
    }
}

impl<'a, T: FieldElement, V: Ord + Clone + Eq> TryFrom<(&'a V, &'a T)> for Component<T, V> {
    type Error = ();
    fn try_from((var, coeff): (&'a V, &'a T)) -> Result<Self, ()> {
        let coeff = *coeff;
        let expr = GroupedExpression::from_unknown_variable(var.clone());
        Ok(Self { coeff, expr })
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq> Component<T, V> {
    /// Normalize the component such that the coefficient is positive.
    fn normalize(self) -> Self {
        if self.coeff.is_in_lower_half() {
            self
        } else {
            Self {
                coeff: -self.coeff,
                expr: -self.expr,
            }
        }
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq> Add for Component<T, V> {
    type Output = Self;

    fn add(self, other: Self) -> Self {
        assert!(self.coeff == other.coeff);
        Self {
            coeff: self.coeff,
            expr: self.expr + other.expr,
        }
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq> Div<T> for Component<T, V> {
    type Output = Self;

    fn div(self, rhs: T) -> Self {
        assert!(!rhs.is_zero());
        Self {
            coeff: self.coeff / rhs,
            expr: self.expr,
        }
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq> From<Component<T, V>> for GroupedExpression<T, V> {
    fn from(comp: Component<T, V>) -> Self {
        comp.expr * comp.coeff
    }
}

impl<T: FieldElement, V: Clone + Ord> Zero for Component<T, V> {
    fn zero() -> Self {
        Self {
            coeff: T::zero(),
            expr: GroupedExpression::zero(),
        }
    }

    fn is_zero(&self) -> bool {
        self.coeff.is_zero() || self.expr.is_zero()
    }
}

#[cfg(test)]
mod test {
    use std::collections::HashMap;

    use expect_test::expect;
    use itertools::Itertools;
    use powdr_number::{BabyBearField, GoldilocksField};

    use super::*;
    use crate::range_constraint::RangeConstraint;

    type Var = &'static str;
    type Qse = GroupedExpression<GoldilocksField, Var>;

    fn var(name: Var) -> Qse {
        Qse::from_unknown_variable(name)
    }

    fn constant(value: u64) -> Qse {
        Qse::from_number(GoldilocksField::from(value))
    }

    fn try_split<T: FieldElement, V: Clone + Ord + Display>(
        expr: GroupedExpression<T, V>,
        rcs: &impl RangeConstraintProvider<T, V>,
    ) -> Option<Vec<AlgebraicConstraint<GroupedExpression<T, V>>>> {
        try_split_constraint(&AlgebraicConstraint::assert_zero(&expr), rcs)
    }

    #[test]
    fn split_simple() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [
            ("x", four_bit_rc),
            ("y", four_bit_rc),
            ("a", four_bit_rc),
            ("b", four_bit_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr = var("x") + var("y") * constant(255) - var("a") + var("b") * constant(255);
        let items = try_split(expr, &rcs).unwrap().iter().join(", ");

        expect!["-(a - x) = 0, b + y = 0"].assert_eq(&items);
    }

    #[test]
    fn split_multiple() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [
            ("x", four_bit_rc),
            ("y", four_bit_rc),
            ("a", four_bit_rc),
            ("b", four_bit_rc),
            ("r", four_bit_rc),
            ("s", four_bit_rc),
            ("w", four_bit_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr = var("x") + var("y") * constant(50) - var("a") + var("b") * constant(50)
            - var("r") * constant(6000)
            + var("s") * constant(6000)
            + var("w") * constant(1200000);
        let items = try_split(expr, &rcs).unwrap().iter().join("\n");
        assert_eq!(
            items,
            "-(a - x) = 0
b + y = 0
-(r - s) = 0
w = 0"
        );
    }

    #[test]
    fn split_seqz() {
        // From the seqz instruction:
        // (b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
        // After boolean extraction:
        // b__3_0 - b_msb_f_0 + 256 * x = 0;
        // or:
        // b__3_0 - b_msb_f_0 + 256 * (1 - x) = 0;

        let byte_rc = RangeConstraint::from_mask(0xffu32);
        let bit_rc = RangeConstraint::from_mask(0x1u32);
        let rcs = [("b__3_0", byte_rc), ("b_msb_f_0", byte_rc), ("x", bit_rc)]
            .into_iter()
            .collect::<HashMap<_, _>>();
        let expr1 = var("b__3_0") - var("b_msb_f_0") + constant(256) * var("x");
        let items = try_split(expr1, &rcs).unwrap().iter().join("\n");
        assert_eq!(
            items,
            "b__3_0 - b_msb_f_0 = 0
x = 0"
        );
        let expr2 = var("b__3_0") - var("b_msb_f_0") + constant(256) * (var("x") - constant(1));
        let items = try_split(expr2, &rcs).unwrap().iter().join("\n");
        assert_eq!(
            items,
            "b__3_0 - b_msb_f_0 = 0
x - 1 = 0"
        );
    }

    #[test]
    fn split_multiple_with_const() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [
            ("x", four_bit_rc),
            ("y", four_bit_rc),
            ("a", four_bit_rc),
            ("b", four_bit_rc),
            ("r", four_bit_rc),
            ("s", four_bit_rc),
            ("w", four_bit_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr = var("x") + var("y") * constant(64)
            - var("a")
            - var("b") * constant(64)
            - var("r") * constant(65536)
            + var("s") * constant(65536)
            + var("w") * constant(0x1000000)
            - constant(5 * 0x1000000 - 6 + 64 - 5 * 65536);

        let items = try_split(expr, &rcs).unwrap().iter().join("\n");
        assert_eq!(
            items,
            "-(a - x - 6) = 0
-(b - y + 1) = 0
-(r - s - 5) = 0
w - 5 = 0"
        );
    }

    #[test]
    fn split_limb_decomposition() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [
            ("l0", four_bit_rc),
            ("l1", four_bit_rc),
            ("l2", four_bit_rc),
            ("l3", four_bit_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr = var("l0")
            + var("l1") * constant(0x10)
            + var("l2") * constant(0x100)
            + var("l3") * constant(0x1000)
            - constant(0x1234);

        let items = try_split(expr, &rcs).unwrap().iter().join("\n");
        assert_eq!(
            items,
            "l0 - 4 = 0
l1 - 3 = 0
l2 - 2 = 0
l3 - 1 = 0"
        );
    }

    #[test]
    fn negated_and_unnegated() {
        // 7864320 * a__0_12 - bool_113 + 314572801
        // a__0_12 + 256 * bool_113 - 216
        let byte_rc = RangeConstraint::from_mask(0xffu32);
        let bit_rc = RangeConstraint::from_mask(0x1u32);
        let rcs = [("a__0_12", byte_rc), ("bool_113", bit_rc)]
            .into_iter()
            .collect::<HashMap<_, _>>();
        let expr1: GroupedExpression<BabyBearField, _> =
            -(GroupedExpression::from_unknown_variable("a__0_12")
                * GroupedExpression::from_number(BabyBearField::from(7864320))
                - GroupedExpression::from_unknown_variable("bool_113")
                + GroupedExpression::from_number(BabyBearField::from(314572801)));

        // Split `expr1` and `-expr1`, the result should be equivalent.
        let first = try_split(expr1.clone(), &rcs)
            .unwrap()
            .into_iter()
            .join(", ");
        expect!["bool_113 = 0, -(a__0_12 - 216) = 0"].assert_eq(&first);
        let expr2 = -expr1;
        let second = try_split(expr2, &rcs).unwrap().into_iter().join(", ");
        expect!["-(bool_113) = 0, a__0_12 - 216 = 0"].assert_eq(&second);
    }

    #[test]
    fn wrapping_1() {
        // -(c__1_3) + 256 * (30720 * c__0_3 - c__2_3) = 1226833928
        let byte_rc = RangeConstraint::from_mask(0xffu32);
        let rcs = [
            ("c__0_3", byte_rc),
            ("c__1_3", byte_rc),
            ("c__2_3", byte_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr: GroupedExpression<BabyBearField, _> =
            -GroupedExpression::from_unknown_variable("c__1_3")
                + GroupedExpression::from_number(BabyBearField::from(256))
                    * (GroupedExpression::from_number(BabyBearField::from(30720))
                        * GroupedExpression::from_unknown_variable("c__0_3")
                        - GroupedExpression::from_unknown_variable("c__2_3"))
                - GroupedExpression::from_number(BabyBearField::from(1226833928));
        let result = try_split(expr.clone(), &rcs).unwrap().iter().join(", ");
        expect!["-(c__1_3 - 248) = 0, c__0_3 - 157 = 0, -(c__2_3 - 30719) = 0"].assert_eq(&result);

        let mut expr = expr;
        expr.substitute_by_known(&"c__0_3", &BabyBearField::from(157));
        expr.substitute_by_known(&"c__1_3", &BabyBearField::from(248));
        expr.substitute_by_known(&"c__2_3", &BabyBearField::from(30719));
        assert!(expr.is_zero());
    }

    #[test]
    fn wrapping_2() {
        // bool_17 + 1069547521 * (a__0_0) = 943718400
        let bit_rc = RangeConstraint::from_mask(0x1u32);
        let rcs = [("bool_17", bit_rc), ("a__0_0", bit_rc)]
            .into_iter()
            .collect::<HashMap<_, _>>();
        let expr: GroupedExpression<BabyBearField, _> =
            GroupedExpression::from_unknown_variable("bool_17")
                + GroupedExpression::from_number(BabyBearField::from(1069547521))
                    * GroupedExpression::from_unknown_variable("a__0_0")
                - GroupedExpression::from_number(BabyBearField::from(943718400));
        let result = try_split(expr.clone(), &rcs).unwrap().iter().join(", ");
        expect!["bool_17 = 0, -(a__0_0 + 1) = 0"].assert_eq(&result);
    }

    #[test]
    fn split_at_boundary() {
        let bit_rc = RangeConstraint::from_mask(0x1u32);
        let limb_rc = RangeConstraint::from_mask(0x7fffu32);
        let rcs = [
            ("bool_103", bit_rc),
            ("to_pc_least_sig_bit_4", bit_rc),
            ("to_pc_limbs__0_4", limb_rc),
        ]
        .into_iter()
        .collect::<HashMap<_, _>>();
        let expr: GroupedExpression<BabyBearField, _> =
            GroupedExpression::from_unknown_variable("bool_103")
                + GroupedExpression::from_number(BabyBearField::from(30720))
                    * (GroupedExpression::from_unknown_variable("to_pc_least_sig_bit_4")
                        + GroupedExpression::from_number(BabyBearField::from(2))
                            * GroupedExpression::from_unknown_variable("to_pc_limbs__0_4"))
                - GroupedExpression::from_number(BabyBearField::from(30720 * 123 + 1));
        let items = try_split(expr, &rcs).unwrap().iter().join(", ");
        assert_eq!(
            items,
            "bool_103 - 1 = 0, to_pc_least_sig_bit_4 - 1 = 0, to_pc_limbs__0_4 - 61 = 0"
        );
    }

    #[test]
    fn bit_decomposition_bug() {
        // This tests against a bug that was present in the old bit
        // decomposition algorithm.
        let lin = var("lin");
        let result = var("result");
        let constr = lin.clone() - constant(4) * result.clone() - constant(4);
        let range_constraints = HashMap::from([
            ("lin", RangeConstraint::from_mask(0x8u32)),
            ("result", RangeConstraint::from_mask(0x1u32)),
        ]);
        // We try to solve `lin - 4 * result = 4` and the problem is
        // that we cannot assign `lin = 4 & mask` for some mask, since
        // it needs to be assigned `8`.
        assert!(try_split(constr, &range_constraints).is_none());
    }

    #[test]
    fn split_fail_overlapping() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [("x", four_bit_rc), ("y", four_bit_rc)]
            .into_iter()
            .collect::<HashMap<_, _>>();
        // The RC of x is not tight enough
        let expr = var("x") + var("y") * constant(2);
        assert!(try_split(expr, &rcs).is_none());
    }

    #[test]
    fn split_fail_not_unique() {
        let four_bit_rc = RangeConstraint::from_mask(0xfu32);
        let rcs = [("x", four_bit_rc), ("y", four_bit_rc), ("z", four_bit_rc)]
            .into_iter()
            .collect::<HashMap<_, _>>();
        // There are multiple ways to solve the modulo equation.
        let expr = (var("x") - var("y")) + constant(16) * var("z") - constant(1);
        assert!(try_split(expr, &rcs).is_none());

        // If we adjust the constant, it works.
        let expr = (var("x") - var("y")) + constant(16) * var("z") - constant(0);
        let result = try_split(expr.clone(), &rcs).unwrap().iter().join(", ");
        expect!["x - y = 0, z = 0"].assert_eq(&result);
    }
}


================================================
FILE: constraint-solver/src/solver/exhaustive_search.rs
================================================
use itertools::Itertools;
use powdr_number::FieldElement;
use powdr_number::LargeInt;

use crate::constraint_system::BusInteractionHandler;
use crate::constraint_system::ConstraintRef;
use crate::effect::Effect;
use crate::grouped_expression::RangeConstraintProvider;
use crate::indexed_constraint_system::IndexedConstraintSystem;
use crate::range_constraint::RangeConstraint;
use crate::utils::{get_all_possible_assignments, has_few_possible_assignments};

use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Display;
use std::hash::Hash;

use super::Error;

/// The maximum number of possible assignments to try when doing exhaustive search.
const MAX_SEARCH_WIDTH: u64 = 1 << 10;
/// The maximum range width of a variable to be considered for exhaustive search.
const MAX_VAR_RANGE_WIDTH: u64 = 5;

/// Goes through all possible assignments for the given variables and tries no deduce
/// new range constraints (on any variable) for each of the assignments. Returns the union of the obtained
/// range constraints over all assignments.
/// Can also return range constraints for the input variables if some of them lead
/// to a contradiction.
/// Returns an error if all assignments are contradictory.
pub fn exhaustive_search_on_variable_set<T: FieldElement, V: Clone + Hash + Ord + Eq + Display>(
    constraint_system: &IndexedConstraintSystem<T, V>,
    variables: &BTreeSet<V>,
    range_constraints: impl RangeConstraintProvider<T, V> + Clone,
    bus_interaction_handler: &impl BusInteractionHandler<T>,
) -> Result<BTreeMap<V, RangeConstraint<T>>, Error> {
    let mut new_constraints =
        get_all_possible_assignments(variables.iter().cloned(), &range_constraints).filter_map(
            |assignments| {
                derive_new_range_constraints(
                    constraint_system,
                    assignments,
                    &range_constraints,
                    bus_interaction_handler,
                )
                .ok()
            },
        );
    let Some(first_assignment_constraints) = new_constraints.next() else {
        // No assignment satisfied the constraint system.
        return Err(Error::ExhaustiveSearchError);
    };
    // Compute the disjunction of the effects af each assignment.
    let result = new_constraints.try_fold(first_assignment_constraints, |mut acc, new_constr| {
        for (var, rc) in &mut acc {
            let other_rc = new_constr.get(var).cloned().unwrap_or_default();
            *rc = rc.disjunction(&other_rc)
        }
        // Remove the constraints that are not better than the ones we already know.
        acc.retain(|v, rc| range_constraints.get(v) != *rc);
        if acc.is_empty() {
            // Exiting early here is crucial for performance.
            // This is not an error though, it only means we could not find an improvement.
            return Err(());
        }
        Ok(acc)
    });
    match result {
        Ok(assignments) => Ok(assignments),
        Err(_) => Ok(Default::default()),
    }
}

/// Returns all unique sets of variables that appear together in an identity
/// (either in an algebraic constraint or in the same field of a bus interaction),
/// IF the number of possible assignments is less than `MAX_SEARCH_WIDTH`.
pub fn get_brute_force_candidates<'a, T: FieldElement, V: Clone + Hash + Ord>(
    constraint_system: &'a IndexedConstraintSystem<T, V>,
    rc: impl RangeConstraintProvider<T, V> + Clone + 'a,
) -> impl Iterator<Item = BTreeSet<V>> + 'a {
    constraint_system
        .algebraic_constraints()
        .iter()
        .map(|c| &c.expression)
        .chain(
            constraint_system
                .bus_interactions()
                .iter()
                .flat_map(|b| b.fields()),
        )
        .map(|expression| {
            expression
                .referenced_unknown_variables()
                .cloned()
                .collect::<BTreeSet<_>>()
        })
        .unique()
        .filter_map(move |variables| {
            match is_candidate_for_exhaustive_search(&variables, &rc) {
                true => Some(variables),
                false => {
                    // It could be that only one variable has a large range, but that the rest uniquely determine it.
                    // In that case, searching through all combinations of the other variables would be enough.
                    // Check if removing the variable results in a small enough set of possible assignments.
                    let num_variables = variables.len();
                    let variables_without_largest_range = variables
                        .into_iter()
                        .sorted_by(|a, b| rc.get(a).size_estimate().cmp(&rc.get(b).size_estimate()))
                        .take(num_variables - 1)
                        .collect::<BTreeSet<_>>();
                    is_candidate_for_exhaustive_search(&variables_without_largest_range, &rc)
                        .then_some(variables_without_largest_range)
                }
            }
        })
        .filter(|variables| !variables.is_empty())
        .unique()
}

fn is_candidate_for_exhaustive_search<T: FieldElement, V: Clone + Ord>(
    variables: &BTreeSet<V>,
    rc: &impl RangeConstraintProvider<T, V>,
) -> bool {
    has_few_possible_assignments(variables.iter().cloned(), rc, MAX_SEARCH_WIDTH)
        && has_small_max_range_constraint_size(variables.iter().cloned(), rc, MAX_VAR_RANGE_WIDTH)
}

fn has_small_max_range_constraint_size<T: FieldElement, V: Clone + Ord>(
    mut variables: impl Iterator<Item = V>,
    rc: &impl RangeConstraintProvider<T, V>,
    threshold: u64,
) -> bool {
    variables.all(|v| {
        if let Some(size) = rc.get(&v).size_estimate().try_into_u64() {
            size <= threshold
        } else {
            false
        }
    })
}

/// The provided assignments lead to a contradiction in the constraint system.
struct ContradictingConstraintError;

/// Given a list of assignments of concrete values to variables, tries to derive
/// new range constraints from them. To keep this function relatively fast,
/// only tries to each algebraic or bus constraint it isolation.
/// Fails if any of the assignments *directly* contradicts any of the constraints.
/// Note that getting an OK(_) here does not mean that there is no contradiction, as
/// this function only does one step of the derivation.
fn derive_new_range_constraints<T: FieldElement, V: Clone + Hash + Ord + Eq + Display>(
    constraint_system: &IndexedConstraintSystem<T, V>,
    assignments: BTreeMap<V, T>,
    range_constraints: &impl RangeConstraintProvider<T, V>,
    bus_interaction_handler: &impl BusInteractionHandler<T>,
) -> Result<BTreeMap<V, RangeConstraint<T>>, ContradictingConstraintError> {
    let effects = constraint_system
        .constraints_referencing_variables(assignments.keys())
        .map(|constraint| match constraint {
            ConstraintRef::AlgebraicConstraint(identity) => {
                let mut identity = identity.cloned();
                for (variable, value) in assignments.iter() {
                    identity.substitute_by_known(variable, value);
                }
                identity
                    .as_ref()
                    .solve(range_constraints)
                    .map(|result| result.effects)
                    .map_err(|_| ContradictingConstraintError)
            }
            ConstraintRef::BusInteraction(bus_interaction) => {
                let mut bus_interaction = bus_interaction.clone();
                for (variable, value) in assignments.iter() {
                    bus_interaction
                        .fields_mut()
                        .for_each(|expr| expr.substitute_by_known(variable, value))
                }
                bus_interaction
                    .solve(bus_interaction_handler, range_constraints)
                    .map_err(|_| ContradictingConstraintError)
            }
        })
        // Early return if any constraint leads to a contradiction.
        .collect::<Result<Vec<_>, _>>()?;

    effects
        .into_iter()
        .flatten()
        .filter_map(|effect| match effect {
            Effect::Assignment(variable, value) => {
                // Turn assignment into range constraint, we can recover it later.
                Some((variable, RangeConstraint::from_value(value)))
            }
            Effect::RangeConstraint(variable, rc) => Some((variable, rc)),
            _ => None,
        })
        .chain(
            assignments
                .into_iter()
                .map(|(v, val)| (v, RangeConstraint::from_value(val))),
        )
        // All range constraints in this iterator hold simultaneously,
        // so we compute the intersection for each variable.
        .try_fold(BTreeMap::new(), |mut map, (variable, rc)| {
            match map.entry(variable.clone()) {
                Entry::Vacant(entry) => {
                    entry.insert(rc);
                }
                Entry::Occupied(mut entry) => {
                    let existing = entry.get();
                    if existing.is_disjoint(&rc) {
                        return Err(ContradictingConstraintError);
                    }
                    entry.insert(existing.conjunction(&rc));
                }
            }
            Ok(map)
        })
}


================================================
FILE: constraint-solver/src/solver/linearizer.rs
================================================
use std::collections::HashMap;
use std::hash::Hash;

use derivative::Derivative;
use itertools::Itertools;
use powdr_number::FieldElement;

use crate::constraint_system::AlgebraicConstraint;
use crate::grouped_expression::GroupedExpression;
use crate::grouped_expression::GroupedExpressionComponent;
use crate::indexed_constraint_system::apply_substitutions_to_expressions;
use crate::solver::VariableAssignment;

/// Solver component that substitutes non-affine sub-expressions
/// by new variables (or constants if those variables have been determined
/// later on to have a constant value).
#[derive(Derivative)]
#[derivative(Default(bound = ""))]
pub struct Linearizer<T, V> {
    substitutions: HashMap<GroupedExpression<T, V>, GroupedExpression<T, V>>,
}

impl<T: FieldElement, V: Clone + Eq + Ord + Hash> Linearizer<T, V> {
    /// Linearizes the expression by introducing new variables for
    /// non-affine parts. The new constraints are appended to
    /// `constraint_collection` and must be added to the system.
    /// The linearized expression is returned.
    pub fn linearize_expression(
        &mut self,
        expr: GroupedExpression<T, V>,
        var_dispenser: &mut impl FnMut() -> V,
        constraint_collection: &mut impl Extend<AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) -> GroupedExpression<T, V> {
        if expr.is_affine() {
            return expr;
        }
        expr.into_summands()
            .map(|c| match c {
                GroupedExpressionComponent::Quadratic(l, r) => {
                    let l = self.linearize_and_substitute_by_var(
                        l,
                        var_dispenser,
                        constraint_collection,
                    );
                    let r = self.linearize_and_substitute_by_var(
                        r,
                        var_dispenser,
                        constraint_collection,
                    );
                    self.substitute_by_var(l * r, var_dispenser, constraint_collection)
                }
                GroupedExpressionComponent::Linear(v, coeff) => {
                    GroupedExpression::from_unknown_variable(v) * coeff
                }
                GroupedExpressionComponent::Constant(c) => {
                    GroupedExpression::from_runtime_constant(c)
                }
            })
            .sum()
    }

    /// Tries to linearize the expression according to already existing substitutions.
    pub fn try_linearize_existing(
        &self,
        expr: GroupedExpression<T, V>,
    ) -> Option<GroupedExpression<T, V>> {
        if expr.is_affine() {
            return Some(expr);
        }
        Some(
            expr.into_summands()
                .map(|c| match c {
                    GroupedExpressionComponent::Quadratic(l, r) => {
                        let l =
                            self.try_substitute_by_existing_var(&self.try_linearize_existing(l)?)?;
                        let r =
                            self.try_substitute_by_existing_var(&self.try_linearize_existing(r)?)?;
                        self.try_substitute_by_existing_var(&(l * r))
                    }
                    GroupedExpressionComponent::Linear(v, coeff) => {
                        Some(GroupedExpression::from_unknown_variable(v) * coeff)
                    }
                    GroupedExpressionComponent::Constant(c) => {
                        Some(GroupedExpression::from_runtime_constant(c))
                    }
                })
                .collect::<Option<Vec<_>>>()?
                .into_iter()
                .sum(),
        )
    }

    /// Linearizes the expression and substitutes the expression by a single variable.
    /// The substitution is not performed if the expression is a constant or a single
    /// variable (without coefficient).
    fn linearize_and_substitute_by_var(
        &mut self,
        expr: GroupedExpression<T, V>,
        var_dispenser: &mut impl FnMut() -> V,
        constraint_collection: &mut impl Extend<AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) -> GroupedExpression<T, V> {
        let linearized = self.linearize_expression(expr, var_dispenser, constraint_collection);
        self.substitute_by_var(linearized, var_dispenser, constraint_collection)
    }

    /// Substitutes the given expression by a single variable using the variable dispenser,
    /// unless the expression is already just a single variable or constant. Re-uses substitutions
    /// that were made in the past.
    /// Adds the equality constraint to `constraint_collection` and returns the variable
    /// as an expression.
    pub fn substitute_by_var(
        &mut self,
        expr: GroupedExpression<T, V>,
        var_dispenser: &mut impl FnMut() -> V,
        constraint_collection: &mut impl Extend<AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) -> GroupedExpression<T, V> {
        if let Some(var) = self.try_substitute_by_existing_var(&expr) {
            var
        } else {
            let var = var_dispenser();
            self.substitutions.insert(
                expr.clone(),
                GroupedExpression::from_unknown_variable(var.clone()),
            );
            let var = GroupedExpression::from_unknown_variable(var);
            constraint_collection.extend([AlgebraicConstraint::assert_zero(expr - var.clone())]);
            var
        }
    }

    /// Tries to substitute the given expression by an existing variable.
    pub fn try_substitute_by_existing_var(
        &self,
        expr: &GroupedExpression<T, V>,
    ) -> Option<GroupedExpression<T, V>> {
        if expr.try_to_known().is_some() || expr.try_to_simple_unknown().is_some() {
            Some(expr.clone())
        } else {
            self.substitutions.get(expr).cloned()
        }
    }

    /// Returns an iterator over expressions equivalent to `expr` with the idea that
    /// they might allow to answer a query better or worse.
    /// It usually returns the original expression, a single variable that it was
    /// substituted into during a previous linearization and a previously linearized version.
    pub fn internalized_versions_of_expression(
        &self,
        expr: &GroupedExpression<T, V>,
    ) -> impl Iterator<Item = GroupedExpression<T, V>> + Clone {
        let direct = expr.clone();
        // See if we have a direct substitution for the expression by a variable.
        let simple_substituted = self.try_substitute_by_existing_var(expr);
        // Try to re-do the linearization
        let substituted = self.try_linearize_existing(expr.clone());
        std::iter::once(direct)
            .chain(simple_substituted)
            .chain(substituted)
    }
}

impl<T: FieldElement, V: Clone + Eq + Ord + Hash> Linearizer<T, V> {
    /// Applies the assignments to the stored substitutions.
    pub fn apply_assignments(&mut self, assignments: &[VariableAssignment<T, V>]) {
        if assignments.is_empty() {
            return;
        }
        let (exprs, vars): (Vec<_>, Vec<_>) = self.substitutions.drain().unzip();
        let exprs = apply_substitutions_to_expressions(exprs, assignments.iter().cloned());
        let vars = apply_substitutions_to_expressions(vars, assignments.iter().cloned());
        self.substitutions = exprs.into_iter().zip_eq(vars).collect();
    }
}

#[cfg(test)]
mod tests {
    use expect_test::expect;
    use powdr_number::GoldilocksField;

    use super::*;
    use crate::{
        bus_interaction_handler::DefaultBusInteractionHandler,
        constraint_system::BusInteraction,
        solver::{
            base::{BaseSolver, VarDispenserImpl},
            var_transformation::Variable,
            Solver,
        },
    };

    type Qse = GroupedExpression<GoldilocksField, Variable<&'static str>>;

    fn var(name: &'static str) -> Qse {
        GroupedExpression::from_unknown_variable(Variable::from(name))
    }

    fn constant(value: u64) -> Qse {
        GroupedExpression::from_number(GoldilocksField::from(value))
    }

    #[test]
    fn linearization() {
        let mut var_counter = 0usize;
        let mut linearizer = Linearizer::default();
        let expr = var("x") + var("y") * (var("z") + constant(1)) * (var("x") - constant(1));
        let mut constraints_to_add = vec![];
        let linearized = linearizer.linearize_expression(
            expr,
            &mut || {
                let var = Variable::Linearized(var_counter);
                var_counter += 1;
                var
            },
            &mut constraints_to_add,
        );
        assert_eq!(linearized.to_string(), "x + lin_3");
        assert_eq!(
            constraints_to_add.into_iter().format("\n").to_string(),
            "z - lin_0 + 1 = 0\n(y) * (lin_0) - lin_1 = 0\nx - lin_2 - 1 = 0\n(lin_1) * (lin_2) - lin_3 = 0"
        );
    }

    #[test]
    fn solver_transforms() {
        let mut solver =
            BaseSolver::<_, _, _, VarDispenserImpl>::new(DefaultBusInteractionHandler::default());
        solver.add_algebraic_constraints(
            [
                (var("x") + var("y")) * (var("z") + constant(1)) * (var("x") - constant(1)),
                (var("a") + var("b")) * (var("c") - constant(2)),
            ]
            .into_iter()
            .map(AlgebraicConstraint::assert_zero),
        );
        solver.add_bus_interactions(vec![BusInteraction {
            bus_id: constant(1),
            payload: vec![var("x") + var("y"), -var("a"), var("a")],
            multiplicity: var("z") + constant(1),
        }]);
        // Below, it is important that in the bus interaction,
        // `a` is not replaced and that the first payload re-uses the
        // already linearized `x + y`.
        expect!([r#"
            ((x + y) * (z + 1)) * (x - 1) = 0
            x + y - lin_0 = 0
            z - lin_1 + 1 = 0
            (lin_0) * (lin_1) - lin_2 = 0
            x - lin_3 - 1 = 0
            (lin_2) * (lin_3) - lin_4 = 0
            lin_4 = 0
            (a + b) * (c - 2) = 0
            a + b - lin_5 = 0
            c - lin_6 - 2 = 0
            (lin_5) * (lin_6) - lin_7 = 0
            lin_7 = 0
            -(a + lin_8) = 0
            BusInteraction { bus_id: 1, multiplicity: lin_1, payload: lin_0, lin_8, a }"#])
        .assert_eq(&solver.to_string());
        let assignments = solver.solve().unwrap();
        expect!([r#"
            lin_4 = 0
            lin_7 = 0"#])
        .assert_eq(
            &assignments
                .iter()
                .map(|(var, value)| format!("{var} = {value}"))
                .join("\n"),
        );

        expect!([r#"
            ((x + y) * (z + 1)) * (x - 1) = 0
            x + y - lin_0 = 0
            z - lin_1 + 1 = 0
            (lin_0) * (lin_1) - lin_2 = 0
            x - lin_3 - 1 = 0
            (lin_2) * (lin_3) = 0
            0 = 0
            (a + b) * (c - 2) = 0
            a + b - lin_5 = 0
            c - lin_6 - 2 = 0
            (lin_5) * (lin_6) = 0
            0 = 0
            -(a + lin_8) = 0
            BusInteraction { bus_id: 1, multiplicity: lin_1, payload: lin_0, lin_8, a }"#])
        .assert_eq(&solver.to_string());
    }
}


================================================
FILE: constraint-solver/src/solver/var_transformation.rs
================================================
use powdr_number::FieldElement;

use crate::constraint_system::{AlgebraicConstraint, BusInteraction};
use crate::grouped_expression::{GroupedExpression, RangeConstraintProvider};
use crate::range_constraint::RangeConstraint;
use crate::runtime_constant::VarTransformable;
use crate::solver::{Error, Solver, VariableAssignment};

use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::hash::Hash;

/// We introduce new variables.
/// This enum avoids clashes with the original variables.
#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub enum Variable<V> {
    /// A regular variable that also exists in the original system.
    Original(V),
    /// A new boolean-constrained variable that was introduced by the solver.
    Boolean(usize),
    /// A new variable introduced by the linearizer.
    Linearized(usize),
}

impl<V> From<V> for Variable<V> {
    /// Converts a regular variable to a `Variable`.
    fn from(v: V) -> Self {
        Variable::Original(v)
    }
}

impl<V: Clone> From<&V> for Variable<V> {
    /// Converts a regular variable to a `Variable`.
    fn from(v: &V) -> Self {
        Variable::Original(v.clone())
    }
}

impl<V: Clone> Variable<V> {
    pub fn try_to_original(&self) -> Option<V> {
        match self {
            Variable::Original(v) => Some(v.clone()),
            _ => None,
        }
    }
}

impl<V: Display> Display for Variable<V> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Variable::Original(v) => write!(f, "{v}"),
            Variable::Boolean(i) => write!(f, "bool_{i}"),
            Variable::Linearized(i) => write!(f, "lin_{i}"),
        }
    }
}

/// A solver that transforms variables from one type to another,
pub struct VarTransformation<T, V, S> {
    solver: S,
    _phantom: std::marker::PhantomData<(T, V)>,
}

impl<T, V, S> VarTransformation<T, V, S>
where
    T: FieldElement,
    V: Clone + Eq,
    S: Solver<T, Variable<V>>,
{
    pub fn new(solver: S) -> Self {
        Self {
            solver,
            _phantom: std::marker::PhantomData,
        }
    }
}

impl<T, V, S> RangeConstraintProvider<T, V> for VarTransformation<T, V, S>
where
    T: FieldElement,
    S: RangeConstraintProvider<T, Variable<V>>,
    V: Clone,
{
    fn get(&self, var: &V) -> RangeConstraint<T> {
        self.solver.get(&Variable::from(var))
    }
}

impl<T, V, S: Display> Display for VarTransformation<T, V, S> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.solver)
    }
}

impl<T, V, S> Solver<T, V> for VarTransformation<T, V, S>
where
    T: FieldElement,
    V: Ord + Clone + Eq + Hash + Display,
    S: Solver<T, Variable<V>>,
{
    /// Solves the system and ignores all assignments that contain a new variable
    /// (either on the LHS or the RHS).
    fn solve(&mut self) -> Result<Vec<VariableAssignment<T, V>>, Error> {
        let assignments = self.solver.solve()?;
        Ok(assignments
            .into_iter()
            .filter_map(|(v, expr)| {
                assert!(expr.is_affine());
                let v = v.try_to_original()?;
                let expr = expr.try_transform_var_type(&mut |v| v.try_to_original())?;
                Some((v, expr))
            })
            .collect())
    }

    fn add_algebraic_constraints(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    ) {
        self.solver
            .add_algebraic_constraints(constraints.into_iter().map(|c| transform_constraint(&c)));
    }

    fn add_bus_interactions(
        &mut self,
        bus_interactions: impl IntoIterator<Item = BusInteraction<GroupedExpression<T, V>>>,
    ) {
        self.solver.add_bus_interactions(
            bus_interactions
                .into_iter()
                .map(|bus_interaction| bus_interaction.fields().map(transform_expr).collect()),
        )
    }

    fn add_range_constraint(&mut self, variable: &V, constraint: RangeConstraint<T>) {
        self.solver
            .add_range_constraint(&variable.into(), constraint);
    }

    fn retain_variables(&mut self, variables_to_keep: &HashSet<V>) {
        // This will cause constraints to be deleted if they
        // only contain newly added variables.
        let variables_to_keep = variables_to_keep
            .iter()
            .map(From::from)
            .collect::<HashSet<_>>();
        self.solver.retain_variables(&variables_to_keep);
    }

    fn range_constraint_for_expression(
        &self,
        expr: &GroupedExpression<T, V>,
    ) -> RangeConstraint<T> {
        self.solver
            .range_constraint_for_expression(&transform_expr(expr))
    }

    fn try_to_equivalent_constant(&self, expr: &GroupedExpression<T, V>) -> Option<T> {
        self.solver
            .try_to_equivalent_constant(&transform_expr(expr))
    }

    fn are_expressions_known_to_be_different(
        &mut self,
        a: &GroupedExpression<T, V>,
        b: &GroupedExpression<T, V>,
    ) -> bool {
        let a = transform_expr(a);
        let b = transform_expr(b);
        self.solver.are_expressions_known_to_be_different(&a, &b)
    }
}

fn transform_expr<T: FieldElement, V: Ord + Clone>(
    expr: &GroupedExpression<T, V>,
) -> GroupedExpression<T, Variable<V>> {
    expr.transform_var_type(&mut |v| v.into())
}

fn transform_constraint<T: FieldElement, V: Ord + Clone>(
    constraint: &AlgebraicConstraint<GroupedExpression<T, V>>,
) -> AlgebraicConstraint<GroupedExpression<T, Variable<V>>> {
    AlgebraicConstraint::assert_zero(transform_expr(&constraint.expression))
}


================================================
FILE: constraint-solver/src/solver.rs
================================================
use powdr_number::FieldElement;

use crate::constraint_system::{
    AlgebraicConstraint, BusInteraction, BusInteractionHandler, ConstraintSystem,
};
use crate::grouped_expression::GroupedExpression;
use crate::range_constraint::RangeConstraint;
use crate::solver::base::{BaseSolver, VarDispenserImpl};
use crate::solver::var_transformation::VarTransformation;

use super::grouped_expression::RangeConstraintProvider;

use crate::algebraic_constraint::solve::Error as AlgebraicSolverError;
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::hash::Hash;

mod base;
mod boolean_extractor;
mod constraint_splitter;
mod exhaustive_search;
mod linearizer;
mod var_transformation;

/// Solve a constraint system, i.e. derive assignments for variables in the system.
pub fn solve_system<T, V>(
    constraint_system: ConstraintSystem<T, V>,
    bus_interaction_handler: impl BusInteractionHandler<T>,
) -> Result<Vec<VariableAssignment<T, V>>, Error>
where
    T: FieldElement,
    V: Ord + Clone + Hash + Eq + Display,
{
    new_solver(constraint_system, bus_interaction_handler).solve()
}

/// Creates a new solver for the given system and bus interaction handler.
pub fn new_solver<T, V>(
    constraint_system: ConstraintSystem<T, V>,
    bus_interaction_handler: impl BusInteractionHandler<T>,
) -> impl Solver<T, V>
where
    T: FieldElement,
    V: Ord + Clone + Hash + Eq + Display,
{
    let mut solver = VarTransformation::new(BaseSolver::<_, _, _, VarDispenserImpl>::new(
        bus_interaction_handler,
    ));
    solver.add_algebraic_constraints(constraint_system.algebraic_constraints);
    solver.add_bus_interactions(constraint_system.bus_interactions);
    solver
}

pub trait Solver<T: FieldElement, V>: RangeConstraintProvider<T, V> + Sized {
    /// Solves the constraints as far as possible, returning concrete variable
    /// assignments. Does not return the same assignments again if called more than once.
    fn solve(&mut self) -> Result<Vec<VariableAssignment<T, V>>, Error>;

    /// Adds a new algebraic constraint to the system.
    fn add_algebraic_constraints(
        &mut self,
        constraints: impl IntoIterator<Item = AlgebraicConstraint<GroupedExpression<T, V>>>,
    );

    /// Adds a new bus interaction to the system.
    fn add_bus_interactions(
        &mut self,
        bus_interactions: impl IntoIterator<Item = BusInteraction<GroupedExpression<T, V>>>,
    );

    /// Adds a new range constraint for the variable.
    fn add_range_constraint(&mut self, var: &V, constraint: RangeConstraint<T>);

    /// Permits the solver to remove all variables except those in `variables_to_keep`.
    /// This should only keep the constraints that reference at least one of the variables.
    fn retain_variables(&mut self, variables_to_keep: &HashSet<V>);

    /// Returns the best known range constraint for the given expression.
    fn range_constraint_for_expression(&self, expr: &GroupedExpression<T, V>)
        -> RangeConstraint<T>;

    /// If the solver can determine the given expression to always have a constant
    /// value, returns that value. Otherwise, returns `None`.
    /// Note that if this function returns `x` on input `e`, replacing `x`
    /// by `x` in a system does not always yield an equivalent system - it might
    /// be less strict. Replacing and afterwards adding `e = x` does yield an
    /// jequivalent system, though.
    fn try_to_equivalent_constant(&self, expr: &GroupedExpression<T, V>) -> Option<T>;

    /// Returns `true` if `a` and `b` are different for all satisfying assignments.
    /// In other words, `a - b` does not allow the value zero.
    /// If this function returns `false`, it does not mean that `a` and `b` are equal,
    /// i.e. a function always returning `false` here satisfies the trait.
    fn are_expressions_known_to_be_different(
        &mut self,
        a: &GroupedExpression<T, V>,
        b: &GroupedExpression<T, V>,
    ) -> bool;
}

/// An error occurred while solving the constraint system.
/// This means that the constraint system is unsatisfiable.
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
    /// An error occurred while calling `GroupedExpression::solve`
    AlgebraicSolverError(AlgebraicSolverError),
    /// The bus interaction handler reported that some sent data was invalid.
    BusInteractionError,
    /// During exhaustive search, we came across a combination of variables for which
    /// no assignment would satisfy all the constraints.
    ExhaustiveSearchError,
}

/// An assignment of a variable.
pub type VariableAssignment<T, V> = (V, GroupedExpression<T, V>);


================================================
FILE: constraint-solver/src/symbolic_expression.rs
================================================
use auto_enums::auto_enum;
use num_traits::{One, Zero};
use std::hash::Hash;
use std::ops::Sub;
use std::ops::{AddAssign, MulAssign};
use std::{
    fmt::{self, Display, Formatter},
    iter,
    ops::{Add, Mul, Neg},
    sync::Arc,
};

use powdr_number::{ExpressionConvertible, FieldElement};

use crate::runtime_constant::{RuntimeConstant, Substitutable, VarTransformable};

use super::range_constraint::RangeConstraint;

/// A value that is known at run-time, defined through a complex expression
/// involving known cells or variables and compile-time constants.
/// Each of the sub-expressions can have its own range constraint.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum SymbolicExpression<T: FieldElement, S> {
    /// A concrete constant value known at compile time.
    Concrete(T),
    /// A symbolic value known at run-time, referencing a cell,
    /// an input, a local variable or whatever it is used for.
    Symbol(S, RangeConstraint<T>),
    BinaryOperation(Arc<Self>, BinaryOperator, Arc<Self>, RangeConstraint<T>),
    UnaryOperation(UnaryOperator, Arc<Self>, RangeConstraint<T>),
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BinaryOperator {
    Add,
    Sub,
    Mul,
    /// Finite field division.
    Div,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UnaryOperator {
    Neg,
}

impl<T: FieldElement, S> SymbolicExpression<T, S> {
    /// Returns all direct children of this expression.
    /// Does specifically not implement the `Children` trait, because it does not go
    /// well with recursive types.
    #[auto_enum(Iterator)]
    fn children(&self) -> impl Iterator<Item = &SymbolicExpression<T, S>> {
        match self {
            SymbolicExpression::BinaryOperation(lhs, _, rhs, _) => {
                [lhs.as_ref(), rhs.as_ref()].into_iter()
            }
            SymbolicExpression::UnaryOperation(_, expr, _) => iter::once(expr.as_ref()),
            SymbolicExpression::Concrete(_) | SymbolicExpression::Symbol(..) => iter::empty(),
        }
    }

    /// Returns an iterator over all direct and indirect children of this expression, including
    /// the expression itself.
    pub fn all_children(&self) -> Box<dyn Iterator<Item = &SymbolicExpression<T, S>> + '_> {
        Box::new(iter::once(self).chain(self.children().flat_map(|e| e.all_children())))
    }
}

impl<T: FieldElement, S> SymbolicExpression<T, S> {
    pub fn from_symbol(symbol: S, rc: RangeConstraint<T>) -> Self {
        if let Some(v) = rc.try_to_single_value() {
            SymbolicExpression::Concrete(v)
        } else {
            SymbolicExpression::Symbol(symbol, rc)
        }
    }
}

impl<T: FieldElement, S: Clone + Eq> SymbolicExpression<T, S> {
    /// Applies a variable substitution and returns a modified version if there was a change.
    pub fn compute_substitution(&self, variable: &S, substitution: &Self) -> Option<Self> {
        match self {
            SymbolicExpression::Concrete(_) => None,
            SymbolicExpression::Symbol(v, _) => (v == variable).then(|| substitution.clone()),
            SymbolicExpression::BinaryOperation(left, op, right, _) => {
                let (l, r) = match (
                    left.compute_substitution(variable, substitution),
                    right.compute_substitution(variable, substitution),
                ) {
                    (None, None) => return None,
                    (Some(l), None) => (l, (**right).clone()),
                    (None, Some(r)) => ((**left).clone(), r),
                    (Some(l), Some(r)) => (l, r),
                };
                match op {
                    BinaryOperator::Add => Some(l + r),
                    BinaryOperator::Sub => Some(l - r),
                    BinaryOperator::Mul => Some(l * r),
                    BinaryOperator::Div => Some(l.field_div(&r)),
                }
            }
            SymbolicExpression::UnaryOperation(op, inner, _) => {
                let inner = inner.compute_substitution(variable, substitution)?;
                match op {
                    UnaryOperator::Neg => Some(-inner),
                }
            }
        }
    }

    /// Applies a variable substitution in place.
    pub fn substitute(&mut self, variable: &S, substitution: &Self) {
        if let Some(updated) = self.compute_substitution(variable, substitution) {
            *self = updated;
        }
    }
}

impl<T: FieldElement, V> ExpressionConvertible<T, V> for SymbolicExpression<T, V> {
    /// Turns a SymbolicExpression into an expression over its variables, essentially
    /// making all variables unknown variables.
    ///
    /// Fails in case a division operation is used.
    fn try_to_expression<
        E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
    >(
        &self,
        number_converter: &impl Fn(&T) -> E,
        var_converter: &impl Fn(&V) -> E,
        try_to_number: &impl Fn(&E) -> Option<T>,
    ) -> Option<E> {
        Some(match self {
            SymbolicExpression::Concrete(value) => number_converter(value),
            SymbolicExpression::Symbol(var, _) => var_converter(var),
            SymbolicExpression::BinaryOperation(left, op, right, _) => {
                let left =
                    left.try_to_expression(number_converter, var_converter, try_to_number)?;
                let right =
                    right.try_to_expression(number_converter, var_converter, try_to_number)?;
                match op {
                    BinaryOperator::Add => left + right,
                    BinaryOperator::Sub => left - right,
                    BinaryOperator::Mul => left * right,
                    BinaryOperator::Div => {
                        if let Some(right) = try_to_number(&right) {
                            left * number_converter(&(T::from(1) / right))
                        } else {
                            return None;
                        }
                    }
                }
            }
            SymbolicExpression::UnaryOperation(op, inner, _) => {
                let inner =
                    inner.try_to_expression(number_converter, var_converter, try_to_number)?;
                match op {
                    UnaryOperator::Neg => -inner,
                }
            }
        })
    }
}

impl<T: FieldElement, S1: Ord + Clone, S2: Ord + Clone> VarTransformable<S1, S2>
    for SymbolicExpression<T, S1>
{
    type Transformed = SymbolicExpression<T, S2>;

    fn try_transform_var_type(
        &self,
        var_transform: &mut impl FnMut(&S1) -> Option<S2>,
    ) -> Option<SymbolicExpression<T, S2>> {
        Some(match self {
            SymbolicExpression::Concrete(n) => SymbolicExpression::Concrete(*n),
            SymbolicExpression::Symbol(v, rc) => {
                SymbolicExpression::from_symbol(var_transform(v)?, *rc)
            }
            SymbolicExpression::BinaryOperation(lhs, op, rhs, rc) => {
                SymbolicExpression::BinaryOperation(
                    Arc::new(lhs.try_transform_var_type(var_transform)?),
                    *op,
                    Arc::new(rhs.try_transform_var_type(var_transform)?),
                    *rc,
                )
            }
            SymbolicExpression::UnaryOperation(op, inner, rc) => {
                SymbolicExpression::UnaryOperation(
                    *op,
                    Arc::new(inner.try_transform_var_type(var_transform)?),
                    *rc,
                )
            }
        })
    }
}

/// Display for affine symbolic expressions, for informational purposes only.
impl<T: FieldElement, V: Display> Display for SymbolicExpression<T, V> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            SymbolicExpression::Concrete(n) => {
                if n.is_in_lower_half() {
                    write!(f, "{n}")
                } else {
                    write!(f, "-{}", -*n)
                }
            }
            SymbolicExpression::Symbol(name, _) => write!(f, "{name}"),
            SymbolicExpression::BinaryOperation(lhs, op, rhs, _) => {
                write!(f, "({lhs} {op} {rhs})")
            }
            SymbolicExpression::UnaryOperation(op, expr, _) => write!(f, "{op}{expr}"),
        }
    }
}

impl Display for BinaryOperator {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            BinaryOperator::Add => write!(f, "+"),
            BinaryOperator::Sub => write!(f, "-"),
            BinaryOperator::Mul => write!(f, "*"),
            BinaryOperator::Div => write!(f, "/"),
        }
    }
}

impl Display for UnaryOperator {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            UnaryOperator::Neg => write!(f, "-"),
        }
    }
}

impl<T: FieldElement, V> From<T> for SymbolicExpression<T, V> {
    fn from(n: T) -> Self {
        SymbolicExpression::Concrete(n)
    }
}

impl<T: FieldElement, V: Clone + Eq> Add for &SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;

    fn add(self, rhs: Self) -> Self::Output {
        if self.is_known_zero() {
            return rhs.clone();
        }
        if rhs.is_known_zero() {
            return self.clone();
        }
        match (self, rhs) {
            (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) => {
                SymbolicExpression::Concrete(*a + *b)
            }
            (SymbolicExpression::UnaryOperation(UnaryOperator::Neg, negated, _), other)
            | (other, SymbolicExpression::UnaryOperation(UnaryOperator::Neg, negated, _))
                if negated.as_ref() == other =>
            {
                T::from(0).into()
            }
            _ => SymbolicExpression::BinaryOperation(
                Arc::new(self.clone()),
                BinaryOperator::Add,
                Arc::new(rhs.clone()),
                self.range_constraint().combine_sum(&rhs.range_constraint()),
            ),
        }
    }
}

impl<T: FieldElement, V: Clone + Eq> Add for SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;
    fn add(self, rhs: Self) -> Self::Output {
        &self + &rhs
    }
}

impl<T: FieldElement, V: Clone + Eq> AddAssign for SymbolicExpression<T, V> {
    fn add_assign(&mut self, rhs: Self) {
        *self = self.clone() + rhs;
    }
}

impl<T: FieldElement, V: Clone + Eq> Sub for &SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;

    fn sub(self, rhs: Self) -> Self::Output {
        if self.is_known_zero() {
            return -rhs.clone();
        }
        if rhs.is_known_zero() {
            return self.clone();
        }
        match (self, rhs) {
            (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) => {
                SymbolicExpression::Concrete(*a - *b)
            }
            (a, b) if a == b => T::from(0).into(),
            _ => SymbolicExpression::BinaryOperation(
                Arc::new(self.clone()),
                BinaryOperator::Sub,
                Arc::new(rhs.clone()),
                self.range_constraint()
                    .combine_sum(&rhs.range_constraint().neg()),
            ),
        }
    }
}

impl<T: FieldElement, V: Clone + Eq> Sub for SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;
    fn sub(self, rhs: Self) -> Self::Output {
        &self - &rhs
    }
}

impl<T: FieldElement, V: Clone + Eq> Neg for &SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;

    fn neg(self) -> Self::Output {
        match self {
            SymbolicExpression::Concrete(n) => SymbolicExpression::Concrete(-*n),
            SymbolicExpression::UnaryOperation(UnaryOperator::Neg, expr, _) => {
                expr.as_ref().clone()
            }
            SymbolicExpression::BinaryOperation(lhs, BinaryOperator::Add, rhs, _) => {
                -(**lhs).clone() + -(**rhs).clone()
            }
            SymbolicExpression::BinaryOperation(lhs, BinaryOperator::Sub, rhs, _) => {
                SymbolicExpression::BinaryOperation(
                    rhs.clone(),
                    BinaryOperator::Sub,
                    lhs.clone(),
                    self.range_constraint().multiple(-T::from(1)),
                )
            }
            SymbolicExpression::BinaryOperation(lhs, BinaryOperator::Mul, rhs, _)
                if matches!(**lhs, SymbolicExpression::Concrete(_)) =>
            {
                SymbolicExpression::BinaryOperation(
                    Arc::new(-(**lhs).clone()),
                    BinaryOperator::Mul,
                    rhs.clone(),
                    self.range_constraint().multiple(-T::from(1)),
                )
            }
            SymbolicExpression::BinaryOperation(lhs, BinaryOperator::Mul, rhs, _)
                if matches!(**rhs, SymbolicExpression::Concrete(_)) =>
            {
                SymbolicExpression::BinaryOperation(
                    lhs.clone(),
                    BinaryOperator::Mul,
                    Arc::new(-(**rhs).clone()),
                    self.range_constraint().multiple(-T::from(1)),
                )
            }
            _ => SymbolicExpression::UnaryOperation(
                UnaryOperator::Neg,
                Arc::new(self.clone()),
                self.range_constraint().multiple(-T::from(1)),
            ),
        }
    }
}

impl<T: FieldElement, V: Clone + Eq> Neg for SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;
    fn neg(self) -> Self::Output {
        -&self
    }
}

impl<T: FieldElement, V: Clone + Eq> Mul for &SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;

    fn mul(self, rhs: Self) -> Self::Output {
        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
            SymbolicExpression::Concrete(*a * *b)
        } else if self.is_known_zero() || rhs.is_known_zero() {
            SymbolicExpression::Concrete(T::from(0))
        } else if self.is_known_one() {
            rhs.clone()
        } else if rhs.is_known_one() {
            self.clone()
        } else if self.is_known_minus_one() {
            -rhs
        } else if rhs.is_known_minus_one() {
            -self
        } else {
            SymbolicExpression::BinaryOperation(
                Arc::new(self.clone()),
                BinaryOperator::Mul,
                Arc::new(rhs.clone()),
                self.range_constraint()
                    .combine_product(&rhs.range_constraint()),
            )
        }
    }
}

impl<T: FieldElement, V: Clone + Eq> Mul for SymbolicExpression<T, V> {
    type Output = SymbolicExpression<T, V>;
    fn mul(self, rhs: Self) -> Self {
        &self * &rhs
    }
}

impl<T: FieldElement, V: Clone + Eq> MulAssign for SymbolicExpression<T, V> {
    fn mul_assign(&mut self, rhs: Self) {
        *self = self.clone() * rhs;
    }
}

impl<T: FieldElement, V: Clone + Eq> Zero for SymbolicExpression<T, V> {
    fn zero() -> Self {
        SymbolicExpression::Concrete(T::from(0))
    }

    fn is_zero(&self) -> bool {
        self.is_known_zero()
    }
}

impl<T: FieldElement, V: Clone + Eq> One for SymbolicExpression<T, V> {
    fn one() -> Self {
        SymbolicExpression::Concrete(T::from(1))
    }

    fn is_one(&self) -> bool {
        self.is_known_one()
    }
}

impl<T: FieldElement, V: Clone + Eq> RuntimeConstant for SymbolicExpression<T, V> {
    type FieldType = T;

    fn try_to_number(&self) -> Option<Self::FieldType> {
        match self {
            SymbolicExpression::Concrete(n) => Some(*n),
            SymbolicExpression::Symbol(..)
            | SymbolicExpression::BinaryOperation(..)
            | SymbolicExpression::UnaryOperation(..) => None,
        }
    }

    fn range_constraint(&self) -> RangeConstraint<Self::FieldType> {
        match self {
            SymbolicExpression::Concrete(v) => RangeConstraint::from_value(*v),
            SymbolicExpression::Symbol(.., rc)
            | SymbolicExpression::BinaryOperation(.., rc)
            | SymbolicExpression::UnaryOperation(.., rc) => *rc,
        }
    }

    /// Field element division.
    /// If you use this, you must ensure that the divisor is not zero.
    fn field_div(&self, rhs: &Self) -> Self {
        if let (SymbolicExpression::Concrete(a), SymbolicExpression::Concrete(b)) = (self, rhs) {
            assert!(b != &T::from(0));
            SymbolicExpression::Concrete(*a / *b)
        } else if self.is_known_zero() {
            SymbolicExpression::Concrete(T::from(0))
        } else if rhs.is_known_one() {
            self.clone()
        } else if rhs.is_known_minus_one() {
            -self
        } else {
            // TODO other simplifications like `-x / -y => x / y`, `-x / concrete => x / -concrete`, etc.
            SymbolicExpression::BinaryOperation(
                Arc::new(self.clone()),
                BinaryOperator::Div,
                Arc::new(rhs.clone()),
                Default::default(),
            )
        }
    }

    /// Returns the multiplicative inverse in the field.
    fn field_inverse(&self) -> Self {
        if let SymbolicExpression::Concrete(x) = self {
            assert!(x != &T::from(0));
            SymbolicExpression::Concrete(T::from(1) / *x)
        } else if let SymbolicExpression::BinaryOperation(x, BinaryOperator::Div, y, _) = self {
            SymbolicExpression::BinaryOperation(
                y.clone(),
                BinaryOperator::Div,
                x.clone(),
                Default::default(),
            )
        } else {
            SymbolicExpression::BinaryOperation(
                Arc::new(Self::from(T::from(1))),
                BinaryOperator::Div,
                Arc::new(self.clone()),
                Default::default(),
            )
        }
    }

    fn from_u64(k: u64) -> Self {
        SymbolicExpression::Concrete(T::from(k))
    }
}

impl<T: FieldElement, V: Clone + Hash + Eq + Ord> Substitutable<V> for SymbolicExpression<T, V> {
    fn substitute(&mut self, variable: &V, substitution: &Self) {
        SymbolicExpression::substitute(self, variable, substitution);
    }
}


================================================
FILE: constraint-solver/src/system_splitter.rs
================================================
use std::hash::Hash;
use std::{collections::BTreeSet, fmt::Display};

use crate::constraint_system::{AlgebraicConstraint, ConstraintRef};
use crate::reachability::reachable_variables;
use crate::{
    constraint_system::ConstraintSystem, indexed_constraint_system::IndexedConstraintSystem,
    runtime_constant::RuntimeConstant,
};

/// Splits the constraint system into independent subsets.
/// Each variable occurs in exactly one subset and all constraints referencing a
/// certain variable have to be in the same subsystem.
/// Note that the list of derived variables in the returned set is empty,
/// but derived variables do occur in the constraints.
pub fn split_system<T: RuntimeConstant, V: Clone + Ord + Hash + Display>(
    constraint_system: IndexedConstraintSystem<T, V>,
) -> Vec<ConstraintSystem<T, V>> {
    // We cleanup and re-index the constraint system, otherwise we get too many
    // empty systems due to variables that have already been substituted.
    let mut constraint_system: ConstraintSystem<T, V> = constraint_system.into();
    constraint_system
        .algebraic_constraints
        .retain(|constr| !constr.is_redundant());
    let constraint_system: IndexedConstraintSystem<T, V> = constraint_system.into();

    let mut systems = Vec::new();
    let mut remaining_variables: BTreeSet<_> = constraint_system.variables().cloned().collect();

    while let Some(v) = remaining_variables.pop_first() {
        let variables_to_extract = reachable_variables([v.clone()], &constraint_system);

        let mut algebraic_constraints = Vec::new();
        let mut bus_interactions = Vec::new();
        for constr in constraint_system.constraints_referencing_variables(&variables_to_extract) {
            match constr {
                ConstraintRef::AlgebraicConstraint(algebraic_constraint) => algebraic_constraints
                    .push(AlgebraicConstraint::assert_zero(
                        algebraic_constraint.expression.clone(),
                    )),
                ConstraintRef::BusInteraction(bus_interaction) => {
                    bus_interactions.push(bus_interaction.clone())
                }
            }
        }
        systems.push(ConstraintSystem {
            algebraic_constraints,
            bus_interactions,
            derived_variables: Vec::new(),
        });
        // Fine to iterate over a hash set here since the order in which we remove
        // is not relevant.
        #[allow(clippy::iter_over_hash_type)]
        for v in variables_to_extract {
            remaining_variables.remove(&v);
        }
    }
    systems
}


================================================
FILE: constraint-solver/src/test_utils.rs
================================================
use powdr_number::GoldilocksField;

use crate::{
    constraint_system::{AlgebraicConstraint, BusInteraction, ConstraintSystem},
    grouped_expression::GroupedExpression,
    runtime_constant::RuntimeConstant,
    symbolic_expression::SymbolicExpression,
};

pub type Var = &'static str;
pub type Qse = GroupedExpression<SymbolicExpression<GoldilocksField, Var>, Var>;

pub fn var(name: Var) -> Qse {
    Qse::from_unknown_variable(name)
}

pub fn constant(value: u64) -> Qse {
    Qse::from_number(GoldilocksField::from(value))
}

impl<T: RuntimeConstant, V> ConstraintSystem<T, V> {
    pub fn with_constraints(
        mut self,
        constraints: Vec<impl Into<AlgebraicConstraint<GroupedExpression<T, V>>>>,
    ) -> Self {
        self.algebraic_constraints
            .extend(constraints.into_iter().map(Into::into));
        self
    }

    pub fn with_bus_interactions(
        mut self,
        bus_interactions: Vec<impl Into<BusInteraction<GroupedExpression<T, V>>>>,
    ) -> Self {
        self.bus_interactions
            .extend(bus_interactions.into_iter().map(Into::into));
        self
    }
}


================================================
FILE: constraint-solver/src/utils.rs
================================================
use std::collections::BTreeMap;
use std::hash::Hash;

use itertools::Itertools;
use powdr_number::{FieldElement, LargeInt};

use crate::grouped_expression::{GroupedExpression, RangeConstraintProvider};
use crate::runtime_constant::{RuntimeConstant, Substitutable};

/// Returns the number of possible assignments for the variables given the range constraints.
/// Returns `None` if this number would not fit a `u64`.
pub fn has_few_possible_assignments<T: FieldElement, V: Clone + Ord>(
    variables: impl Iterator<Item = V>,
    rc: &impl RangeConstraintProvider<T, V>,
    threshold: u64,
) -> bool {
    variables
        .map(|v| rc.get(&v))
        .map(|rc| rc.size_estimate().try_into_u64())
        .try_fold(1u64, |acc, x| acc.checked_mul(x?))
        .is_some_and(|count| count <= threshold)
}

/// Returns all possible assignments for the given variables that satisfy their
/// range constraints.
///
/// Note that it should be verified that the returned sequence is
/// "small" before calling this function, for example using
/// the function `has_few_possible_assignments`.
pub fn get_all_possible_assignments<T: FieldElement, V: Clone + Ord>(
    variables: impl IntoIterator<Item = V>,
    rc: &impl RangeConstraintProvider<T, V>,
) -> impl Iterator<Item = BTreeMap<V, T>> {
    variables
        .into_iter()
        .map(|v| {
            rc.get(&v)
                .allowed_values()
                .collect_vec()
                .into_iter()
                .map(move |value| (v.clone(), value))
        })
        .multi_cartesian_product()
        .map(|assignment| assignment.into_iter().collect::<BTreeMap<_, _>>())
}

/// Returns all possible concrete values for `expr` using exhaustive search.
/// Returns None if the number of possible assignments exceeds `max_elements`.
pub fn possible_concrete_values<
    'a,
    T: RuntimeConstant + Substitutable<V> + Clone,
    V: Clone + Ord + Hash,
>(
    expr: &'a GroupedExpression<T, V>,
    rc: &'a impl RangeConstraintProvider<T::FieldType, V>,
    max_elements: u64,
) -> Option<impl Iterator<Item = T> + 'a> {
    let variables = expr.referenced_unknown_variables().cloned().collect_vec();
    if has_few_possible_assignments(variables.iter().cloned(), rc, max_elements) {
        Some(
            get_all_possible_assignments(variables, rc).map(|assignment| {
                let mut expr = expr.clone();
                for (variable, value) in assignment.iter() {
                    expr.substitute_by_known(variable, &T::from(*value));
                }
                // We substitute all variables, so this has to be a runtime constant.
                expr.try_to_known().unwrap().clone()
            }),
        )
    } else {
        // If there are too many possible assignments, we do not try to perform exhaustive search.
        None
    }
}


================================================
FILE: constraint-solver/src/variable_update.rs
================================================
use powdr_number::FieldElement;

use super::range_constraint::RangeConstraint;

/// An update representing new information about a variable.
#[derive(Debug, Clone)]
pub struct VariableUpdate<T: FieldElement, V, R> {
    pub variable: V,
    pub update: UpdateKind<T, R>,
}

#[derive(Debug, Clone)]
pub enum UpdateKind<T: FieldElement, R> {
    /// We have updated range constraints for the variable.
    RangeConstraintUpdate(RangeConstraint<T>),
    /// The variable is to be replaced by a different expression.
    Replace(R),
}


================================================
FILE: constraint-solver/tests/solver.rs
================================================
use std::collections::BTreeMap;

use num_traits::identities::{One, Zero};
use powdr_constraint_solver::{
    bus_interaction_handler::DefaultBusInteractionHandler,
    constraint_system::{BusInteraction, BusInteractionHandler, ConstraintSystem},
    grouped_expression::GroupedExpression,
    range_constraint::RangeConstraint,
    solver::{solve_system, Error},
};
use powdr_number::{FieldElement, GoldilocksField, LargeInt};
use test_log::test;

use pretty_assertions::assert_eq;

type Var = &'static str;

fn var(name: Var) -> GroupedExpression<GoldilocksField, Var> {
    GroupedExpression::from_unknown_variable(name)
}

fn constant(value: u64) -> GroupedExpression<GoldilocksField, Var> {
    GroupedExpression::from_number(GoldilocksField::from(value))
}

pub fn assert_solve_result<B: BusInteractionHandler<GoldilocksField>>(
    system: ConstraintSystem<GoldilocksField, Var>,
    bus_interaction_handler: B,
    expected_assignments: Vec<(Var, GoldilocksField)>,
) {
    let final_state = solve_system(system, bus_interaction_handler).unwrap();
    let expected_final_state = expected_assignments.into_iter().collect();
    assert_expected_state(final_state, expected_final_state);
}

fn assert_expected_state(
    final_state: impl IntoIterator<Item = (Var, GroupedExpression<GoldilocksField, Var>)>,
    expected_final_state: BTreeMap<Var, GoldilocksField>,
) {
    let final_state = final_state.into_iter().collect::<BTreeMap<_, _>>();
    assert_eq!(
        final_state.keys().collect::<Vec<_>>(),
        expected_final_state.keys().collect::<Vec<_>>(),
        "Different set of variables"
    );

    let mut error = false;
    for (variable, value) in expected_final_state {
        // Compare string representation, so that range constraints are ignored.
        if final_state[variable].to_string() != value.to_string() {
            log::error!("Mismatch for variable {variable}:");
            log::error!("  Expected: {value}");
            log::error!("  Actual:   {}", final_state[variable]);
            error = true;
        }
    }
    assert!(!error, "Final state does not match expected state");
}

#[test]
fn single_variable() {
    assert_solve_result(
        ConstraintSystem::default().with_constraints(vec![var("x") - constant(5)]),
        DefaultBusInteractionHandler::default(),
        vec![("x", 5.into())],
    );
}

#[test]
fn concretely_solvable() {
    let constraint_system = ConstraintSystem::default().with_constraints(vec![
        var("a") - constant(2),
        var("b") - constant(3),
        // c = a * b = 6
        var("c") - var("a") * var("b"),
        // d = c * 4 - a = 22
        var("d") - (var("c") * constant(4) - var("a")),
    ]);
    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![
            ("a", 2.into()),
            ("b", 3.into()),
            ("c", 6.into()),
            ("d", 22.into()),
        ],
    );
}

#[test]
fn bit_decomposition() {
    let constraint_system = ConstraintSystem::default().with_constraints(vec![
        // 4 bit-constrained variables:
        var("b0") * (var("b0") - constant(1)),
        var("b1") * (var("b1") - constant(1)),
        var("b2") * (var("b2") - constant(1)),
        var("b3") * (var("b3") - constant(1)),
        // Bit-decomposition of a concrete value:
        var("b0") + var("b1") * constant(2) + var("b2") * constant(4) + var("b3") * constant(8)
            - constant(0b1110),
    ]);

    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![
            ("b0", 0.into()),
            ("b1", 1.into()),
            ("b2", 1.into()),
            ("b3", 1.into()),
        ],
    );
}

const BYTE_BUS_ID: u64 = 42;
const XOR_BUS_ID: u64 = 43;

struct TestBusInteractionHandler;
impl BusInteractionHandler<GoldilocksField> for TestBusInteractionHandler {
    fn handle_bus_interaction(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<GoldilocksField>>,
    ) -> BusInteraction<RangeConstraint<GoldilocksField>> {
        let (Some(bus_id), Some(multiplicity)) = (
            bus_interaction.bus_id.try_to_single_value(),
            bus_interaction.multiplicity.try_to_single_value(),
        ) else {
            return bus_interaction;
        };

        if multiplicity.is_zero() {
            return bus_interaction;
        }

        assert!(multiplicity.is_one(), "Only expected send interactions");
        let byte_constraint = RangeConstraint::from_mask(0xffu32);
        let payload_constraints = match bus_id.to_integer().try_into_u64().unwrap() {
            BYTE_BUS_ID => {
                assert_eq!(bus_interaction.payload.len(), 1);
                vec![byte_constraint]
            }
            XOR_BUS_ID => {
                assert_eq!(bus_interaction.payload.len(), 3);
                if let (Some(a), Some(b)) = (
                    bus_interaction.payload[0].try_to_single_value(),
                    bus_interaction.payload[1].try_to_single_value(),
                ) {
                    // Both inputs are known, can compute result concretely
                    let result = GoldilocksField::from(
                        a.to_integer().try_into_u64().unwrap()
                            ^ b.to_integer().try_into_u64().unwrap(),
                    );
                    vec![
                        bus_interaction.payload[0],
                        bus_interaction.payload[1],
                        RangeConstraint::from_value(result),
                    ]
                } else {
                    vec![byte_constraint; 3]
                }
            }
            _ => {
                panic!("Unexpected bus ID: {bus_id}");
            }
        };
        BusInteraction {
            payload: payload_constraints,
            ..bus_interaction
        }
    }
}

fn send(
    bus_id: u64,
    payload: Vec<GroupedExpression<GoldilocksField, Var>>,
) -> BusInteraction<GroupedExpression<GoldilocksField, Var>> {
    BusInteraction {
        multiplicity: constant(1),
        bus_id: constant(bus_id),
        payload,
    }
}

#[test]
fn byte_decomposition() {
    let constraint_system = ConstraintSystem::default()
        .with_constraints(vec![
            // Byte-decomposition of a concrete value:
            var("b0")
                + var("b1") * constant(1 << 8)
                + var("b2") * constant(1 << 16)
                + var("b3") * constant(1 << 24)
                - constant(0xabcdef12),
        ])
        .with_bus_interactions(
            // Byte range constraints on b0..3
            (0..4)
                .map(|i| send(BYTE_BUS_ID, vec![var(format!("b{i}").leak())]))
                .collect(),
        );

    assert_solve_result(
        constraint_system,
        TestBusInteractionHandler,
        vec![
            ("b0", 0x12.into()),
            ("b1", 0xef.into()),
            ("b2", 0xcd.into()),
            ("b3", 0xab.into()),
        ],
    );
}

#[test]
fn xor() {
    let constraint_system = ConstraintSystem::default()
        .with_constraints(vec![
            // a and b are the byte decomposition of 0xa00b
            // Note that solving this requires range constraints on a and b
            constant(1 << 8) * var("a") + var("b") - constant(0xa00b),
        ])
        // Send (a, b, c) to the XOR table.
        // Initially, this should return the required range constraints for a and b.
        // Once a and b are known concretely, c can be computed concretely as well.
        .with_bus_interactions(vec![send(XOR_BUS_ID, vec![var("a"), var("b"), var("c")])]);

    assert_solve_result(
        constraint_system,
        TestBusInteractionHandler,
        vec![("a", 0xa0.into()), ("b", 0x0b.into()), ("c", 0xab.into())],
    );
}

#[test]
fn xor_invalid() {
    let constraint_system = ConstraintSystem::default()
        .with_constraints(vec![
            var("a") - constant(0xa0),
            var("b") - constant(0x0b),
            var("c") - constant(0xff),
        ])
        .with_bus_interactions(vec![send(XOR_BUS_ID, vec![var("a"), var("b"), var("c")])]);

    match solve_system(constraint_system, TestBusInteractionHandler) {
        Err(e) => assert_eq!(e, Error::BusInteractionError),
        _ => panic!("Expected error!"),
    }
}

#[test]
fn one_hot_flags() {
    let constraint_system = ConstraintSystem::default().with_constraints(vec![
        // Boolean flags
        var("flag0") * (var("flag0") - constant(1)),
        var("flag1") * (var("flag1") - constant(1)),
        var("flag2") * (var("flag2") - constant(1)),
        var("flag3") * (var("flag3") - constant(1)),
        // Exactly one flag is active
        var("flag0") + var("flag1") + var("flag2") + var("flag3") - constant(1),
        // Flag 2 is active
        var("flag0") * constant(0)
            + var("flag1") * constant(1)
            + var("flag2") * constant(2)
            + var("flag3") * constant(3)
            - constant(2),
    ]);

    // This can be solved via backtracking: There are 16 possible assignments
    // for the 4 flags, but only 1 of them satisfies all the constraints.
    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![
            ("flag0", 0.into()),
            ("flag1", 0.into()),
            ("flag2", 1.into()),
            ("flag3", 0.into()),
        ],
    );
}

#[test]
fn binary_flags() {
    let bit_to_expression = |bit, var| match bit {
        true => var,
        false => constant(1) - var,
    };
    let index_to_expression = |i: usize| -> GroupedExpression<GoldilocksField, Var> {
        (0..3)
            .map(move |j| bit_to_expression(i & (1 << j) != 0, var(format!("flag{j}").leak())))
            .fold(constant(1), |acc, x| acc * x)
    };
    let constraint_system = ConstraintSystem::default().with_constraints(vec![
        // Boolean flags
        var("flag0") * (var("flag0") - constant(1)),
        var("flag1") * (var("flag1") - constant(1)),
        var("flag2") * (var("flag2") - constant(1)),
        index_to_expression(0b000) * constant(101)
            + index_to_expression(0b001) * constant(102)
            + index_to_expression(0b010) * constant(103)
            + index_to_expression(0b011) * constant(104)
            + index_to_expression(0b100) * constant(105)
            + index_to_expression(0b101) * constant(106)
            + index_to_expression(0b110) * constant(107)
            + index_to_expression(0b111) * constant(108)
            - constant(104),
    ]);

    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![
            ("flag0", 1.into()),
            ("flag1", 1.into()),
            ("flag2", 0.into()),
        ],
    );
}

#[test]
fn ternary_flags() {
    // Implementing this logic in the OpenVM load/store chip:
    // https://github.com/openvm-org/openvm/blob/v1.2.0/extensions/rv32im/circuit/src/loadstore/core.rs#L110-L139
    let two_inv = GroupedExpression::from_number(GoldilocksField::one() / GoldilocksField::from(2));
    let neg_one = GroupedExpression::from_number(-GoldilocksField::one());
    let sum = var("flag0") + var("flag1") + var("flag2") + var("flag3");
    // The flags must be 0, 1, or 2, and their sum must be 1 or 2.
    // Given these constraints, there are 14 possible assignments. The following
    // expressions evaluate to 1 for exactly one of them, and otherwise to 0:
    let cases = vec![
        // (2, 0, 0, 0), (0, 2, 0, 0), (0, 0, 2, 0), (0, 0, 0, 2)
        var("flag0") * (var("flag0") - constant(1)) * two_inv.clone(),
        var("flag1") * (var("flag1") - constant(1)) * two_inv.clone(),
        var("flag2") * (var("flag2") - constant(1)) * two_inv.clone(),
        var("flag3") * (var("flag3") - constant(1)) * two_inv.clone(),
        // (1, 0, 0, 0), (0, 1, 0, 0), (0, 0, 1, 0), (0, 0, 0, 1)
        var("flag0") * (sum.clone() - constant(2)) * neg_one.clone(),
        var("flag1") * (sum.clone() - constant(2)) * neg_one.clone(),
        var("flag2") * (sum.clone() - constant(2)) * neg_one.clone(),
        var("flag3") * (sum.clone() - constant(2)) * neg_one.clone(),
        // (1, 1, 0, 0), (1, 0, 1, 0), (1, 0, 0, 1), (0, 1, 1, 0), (0, 1, 0, 1), (0, 0, 1, 1)
        var("flag0") * var("flag1"),
        var("flag0") * var("flag2"),
        var("flag0") * var("flag3"),
        var("flag1") * var("flag2"),
        var("flag1") * var("flag3"),
        var("flag2") * var("flag3"),
    ];
    let constraint_system = ConstraintSystem::default().with_constraints(vec![
        // All flags are either 0, 1, or 2.
        var("flag0") * (var("flag0") - constant(1)) * (var("flag0") - constant(2)),
        var("flag1") * (var("flag1") - constant(1)) * (var("flag1") - constant(2)),
        var("flag2") * (var("flag2") - constant(1)) * (var("flag2") - constant(2)),
        var("flag3") * (var("flag3") - constant(1)) * (var("flag3") - constant(2)),
        // The sum of flags is either 1 or 2.
        (sum.clone() - constant(1)) * (sum.clone() - constant(2)),
        // Of the expressions in `cases`, exactly one must evaluate to 1.
        // From this constraint, it can be derived that it must be one of case 3, 4, 5, or 6.
        cases[0].clone() * constant(1)
            + (cases[1].clone() + cases[2].clone()) * constant(2)
            + (cases[3].clone() + cases[4].clone() + cases[5].clone() + cases[6].clone())
                * constant(3)
            + cases[7].clone() * constant(4)
            + (cases[8].clone() + cases[9].clone()) * constant(5)
            + (cases[10].clone() + cases[11].clone() + cases[12].clone() + cases[13].clone())
                * constant(6)
            - constant(3),
        // We don't know which case is active, but for any of the cases that it could be,
        // is_load would be 1, so we should be able to solve for it.
        var("is_load")
            - (cases[0].clone()
                + cases[1].clone()
                + cases[2].clone()
                + cases[3].clone()
                + cases[4].clone()
                + cases[5].clone()
                + cases[6].clone()),
    ]);

    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![("is_load", 1.into())],
    );
}

#[test]
fn bit_decomposition_bug() {
    let algebraic_constraints = vec![
        var("cmp_result_0") * (var("cmp_result_0") - constant(1)),
        var("imm_0") - constant(8),
        var("cmp_result_0") * var("imm_0")
            - constant(4) * var("cmp_result_0")
            - var("BusInteractionField(10, 2)")
            + constant(4),
        (var("BusInteractionField(10, 2)") - constant(4))
            * (var("BusInteractionField(10, 2)") - constant(8)),
    ];
    let constraint_system = ConstraintSystem::default().with_constraints(algebraic_constraints);
    // The solver used to infer more assignments due to a bug
    // in the bit decomposition logic.
    assert_solve_result(
        constraint_system,
        DefaultBusInteractionHandler::default(),
        vec![("imm_0", 8.into())],
    );
}


================================================
FILE: expression/Cargo.toml
================================================
[package]
name = "powdr-expression"
description = "powdr expression type"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[dependencies]
powdr-number.workspace = true

num-traits.workspace = true
derive_more.workspace = true
serde = { version = "1.0", default-features = false, features = ["alloc", "derive", "rc"] }
schemars = { version = "0.8.16", features = ["preserve_order"]}

[dev-dependencies]
test-log.workspace = true
pretty_assertions.workspace = true
serde_json.workspace = true

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: expression/src/display.rs
================================================
use std::fmt::{self, Display, Formatter};

use crate::{
    AlgebraicBinaryOperation, AlgebraicBinaryOperator, AlgebraicExpression,
    AlgebraicUnaryOperation, AlgebraicUnaryOperator,
};

type ExpressionPrecedence = u64;
trait Precedence {
    fn precedence(&self) -> Option<ExpressionPrecedence>;
}

impl Precedence for AlgebraicUnaryOperator {
    fn precedence(&self) -> Option<ExpressionPrecedence> {
        Some(match self {
            AlgebraicUnaryOperator::Minus => 1,
        })
    }
}

impl Precedence for AlgebraicBinaryOperator {
    fn precedence(&self) -> Option<ExpressionPrecedence> {
        Some(match self {
            Self::Mul => 3,
            Self::Add | Self::Sub => 4,
        })
    }
}

impl<T, R> Precedence for AlgebraicExpression<T, R> {
    fn precedence(&self) -> Option<ExpressionPrecedence> {
        match self {
            AlgebraicExpression::UnaryOperation(operation) => operation.op.precedence(),
            AlgebraicExpression::BinaryOperation(operation) => operation.op.precedence(),
            AlgebraicExpression::Number(..) | AlgebraicExpression::Reference(..) => None,
        }
    }
}

impl<T: Display, R: Display> Display for AlgebraicBinaryOperation<T, R> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        let op_precedence = self.op.precedence().unwrap();
        let use_left_parentheses = match self.left.precedence() {
            Some(left_precedence) => left_precedence > op_precedence,
            None => false,
        };

        let use_right_parentheses = match self.right.precedence() {
            Some(right_precedence) => right_precedence >= op_precedence,
            None => false,
        };

        let left_string = if use_left_parentheses {
            format!("({})", self.left)
        } else {
            format!("{}", self.left)
        };
        let right_string = if use_right_parentheses {
            format!("({})", self.right)
        } else {
            format!("{}", self.right)
        };

        write!(f, "{left_string} {} {right_string}", self.op)
    }
}

impl<T: Display, R: Display> Display for AlgebraicUnaryOperation<T, R> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        let exp_string = match (self.op.precedence(), self.expr.precedence()) {
            (Some(precedence), Some(inner_precedence)) if precedence < inner_precedence => {
                format!("({})", self.expr)
            }
            _ => {
                format!("{}", self.expr)
            }
        };

        write!(f, "{}{exp_string}", self.op)
    }
}

impl Display for AlgebraicUnaryOperator {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            AlgebraicUnaryOperator::Minus => write!(f, "-"),
        }
    }
}

impl Display for AlgebraicBinaryOperator {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            AlgebraicBinaryOperator::Add => write!(f, "+"),
            AlgebraicBinaryOperator::Sub => write!(f, "-"),
            AlgebraicBinaryOperator::Mul => write!(f, "*"),
        }
    }
}

#[cfg(test)]
mod test {
    use powdr_number::GoldilocksField;
    use pretty_assertions::assert_eq;
    use test_log::test;

    use super::AlgebraicExpression;

    fn test_display(expr: AlgebraicExpression<GoldilocksField, &str>, expected: &str) {
        assert_eq!(expr.to_string(), expected);
    }

    #[test]
    fn binary_op() {
        let x = AlgebraicExpression::Reference("x");
        let y = AlgebraicExpression::Reference("y");
        let z = AlgebraicExpression::Reference("z");
        // Don't add extra
        test_display(x.clone() + y.clone() + z.clone(), "x + y + z");
        test_display(x.clone() * y.clone() * z.clone(), "x * y * z");
        // Remove unneeded
        test_display(-x.clone() + y.clone() * z.clone(), "-x + y * z");
        test_display((x.clone() * y.clone()) * z.clone(), "x * y * z");
        test_display(x.clone() - (y.clone() + z.clone()), "x - (y + z)");
        test_display((x.clone() * y.clone()) + z.clone(), "x * y + z");
        // Observe associativity
        test_display(x.clone() * (y.clone() * z.clone()), "x * (y * z)");
        test_display(x.clone() + (y.clone() + z.clone()), "x + (y + z)");
        // Don't remove needed
        test_display((x.clone() + y.clone()) * z.clone(), "(x + y) * z");
        test_display(-(x.clone() + y.clone()), "-(x + y)");
    }
}


================================================
FILE: expression/src/lib.rs
================================================
use std::{
    iter,
    ops::{self, Add, Mul, Neg, Sub},
};

use powdr_number::ExpressionConvertible;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

pub mod display;
pub mod visitors;

#[derive(
    Debug,
    PartialEq,
    Eq,
    PartialOrd,
    Ord,
    Clone,
    Serialize,
    Deserialize,
    JsonSchema,
    Hash,
    derive_more::Display,
)]
pub enum AlgebraicExpression<T, R> {
    #[serde(untagged)]
    Reference(R),
    #[serde(untagged)]
    Number(T),
    #[serde(untagged, serialize_with = "serialize_binary_operation")]
    BinaryOperation(AlgebraicBinaryOperation<T, R>),
    #[serde(untagged, serialize_with = "serialize_unary_operation")]
    UnaryOperation(AlgebraicUnaryOperation<T, R>),
}

#[derive(
    Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize, JsonSchema, Hash,
)]
pub struct AlgebraicBinaryOperation<T, R> {
    pub left: Box<AlgebraicExpression<T, R>>,
    pub op: AlgebraicBinaryOperator,
    pub right: Box<AlgebraicExpression<T, R>>,
}

#[derive(
    Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Serialize, Deserialize, JsonSchema, Hash,
)]
pub enum AlgebraicBinaryOperator {
    #[serde(rename = "+")]
    Add,
    #[serde(rename = "-")]
    Sub,
    #[serde(rename = "*")]
    Mul,
}

#[derive(
    Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize, JsonSchema, Hash,
)]
pub struct AlgebraicUnaryOperation<T, R> {
    pub op: AlgebraicUnaryOperator,
    pub expr: Box<AlgebraicExpression<T, R>>,
}

#[derive(
    Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Serialize, Deserialize, JsonSchema, Hash,
)]
pub enum AlgebraicUnaryOperator {
    #[serde(rename = "-")]
    Minus,
}

impl<T, R> AlgebraicExpression<T, R> {
    /// Returns an iterator over all (top-level) expressions in this expression.
    /// This specifically does not implement the Children trait because otherwise it
    /// would have a wrong implementation of ExpressionVisitable (which is implemented
    /// generically for all types that implement Children<Expr>).
    fn children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T, R>> + '_> {
        match self {
            AlgebraicExpression::Reference(_) | AlgebraicExpression::Number(_) => {
                Box::new(iter::empty())
            }
            AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation {
                left, right, ..
            }) => Box::new([left.as_ref(), right.as_ref()].into_iter()),
            AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation { expr: e, .. }) => {
                Box::new([e.as_ref()].into_iter())
            }
        }
    }
    /// Returns an iterator over all (top-level) expressions in this expression.
    /// This specifically does not implement the Children trait because otherwise it
    /// would have a wrong implementation of ExpressionVisitable (which is implemented
    /// generically for all types that implement Children<Expr>).
    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut AlgebraicExpression<T, R>> + '_> {
        match self {
            AlgebraicExpression::Reference(_) | AlgebraicExpression::Number(_) => {
                Box::new(iter::empty())
            }
            AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation {
                left, right, ..
            }) => Box::new([left.as_mut(), right.as_mut()].into_iter()),
            AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation { expr: e, .. }) => {
                Box::new([e.as_mut()].into_iter())
            }
        }
    }

    /// Returns the degree of the expressions
    pub fn degree(&self) -> usize {
        match self {
            AlgebraicExpression::Reference(..) => 1,
            // Multiplying two expressions adds their degrees
            AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation {
                op: AlgebraicBinaryOperator::Mul,
                left,
                right,
            }) => left.degree() + right.degree(),
            // In all other cases, we take the maximum of the degrees of the children
            _ => self.children().map(|e| e.degree()).max().unwrap_or(0),
        }
    }

    pub fn new_binary(left: Self, op: AlgebraicBinaryOperator, right: Self) -> Self {
        AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation {
            left: Box::new(left),
            op,
            right: Box::new(right),
        })
    }

    pub fn new_unary(op: AlgebraicUnaryOperator, expr: Self) -> Self {
        AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation {
            op,
            expr: Box::new(expr),
        })
    }
}

impl<T, R> ops::Add for AlgebraicExpression<T, R> {
    type Output = Self;

    fn add(self, rhs: Self) -> Self::Output {
        Self::new_binary(self, AlgebraicBinaryOperator::Add, rhs)
    }
}

impl<T, R> ops::Sub for AlgebraicExpression<T, R> {
    type Output = Self;

    fn sub(self, rhs: Self) -> Self::Output {
        Self::new_binary(self, AlgebraicBinaryOperator::Sub, rhs)
    }
}

impl<T, R> ops::Neg for AlgebraicExpression<T, R> {
    type Output = Self;

    fn neg(self) -> Self::Output {
        Self::new_unary(AlgebraicUnaryOperator::Minus, self)
    }
}

impl<T, R> ops::Mul for AlgebraicExpression<T, R> {
    type Output = Self;

    fn mul(self, rhs: Self) -> Self::Output {
        Self::new_binary(self, AlgebraicBinaryOperator::Mul, rhs)
    }
}

impl<T, R> From<T> for AlgebraicExpression<T, R> {
    fn from(value: T) -> Self {
        AlgebraicExpression::Number(value)
    }
}

impl<T, R> ExpressionConvertible<T, R> for AlgebraicExpression<T, R> {
    fn to_expression<
        E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
    >(
        &self,
        number_converter: &impl Fn(&T) -> E,
        var_converter: &impl Fn(&R) -> E,
    ) -> E {
        match self {
            AlgebraicExpression::Reference(r) => var_converter(r),
            AlgebraicExpression::Number(n) => number_converter(n),
            AlgebraicExpression::BinaryOperation(AlgebraicBinaryOperation { left, op, right }) => {
                let left = left.to_expression(number_converter, var_converter);
                let right = right.to_expression(number_converter, var_converter);

                match op {
                    AlgebraicBinaryOperator::Add => left + right,
                    AlgebraicBinaryOperator::Sub => left - right,
                    AlgebraicBinaryOperator::Mul => left * right,
                }
            }
            AlgebraicExpression::UnaryOperation(AlgebraicUnaryOperation { op, expr }) => match op {
                AlgebraicUnaryOperator::Minus => {
                    -expr.to_expression(number_converter, var_converter)
                }
            },
        }
    }
}

fn serialize_unary_operation<S, T, R>(
    un_op: &AlgebraicUnaryOperation<T, R>,
    serializer: S,
) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
    T: Serialize,
    R: Serialize,
{
    (&un_op.op, un_op.expr.as_ref()).serialize(serializer)
}

fn serialize_binary_operation<S, T, R>(
    bin_op: &AlgebraicBinaryOperation<T, R>,
    serializer: S,
) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
    T: Serialize,
    R: Serialize,
{
    (bin_op.left.as_ref(), &bin_op.op, bin_op.right.as_ref()).serialize(serializer)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_serde() {
        let x: AlgebraicExpression<u32, &'static str> = AlgebraicExpression::from(5)
            * AlgebraicExpression::Reference("x")
            - AlgebraicExpression::from(3);
        let serialized = serde_json::to_string(&x).unwrap();
        assert_eq!(serialized, r#"[[5,"*","x"],"-",3]"#);
        let deserialized = serde_json::from_str(&serialized).unwrap();
        assert_eq!(x, deserialized);
    }
}


================================================
FILE: expression/src/visitors.rs
================================================
use std::{iter, ops::ControlFlow};

use crate::AlgebraicExpression;

/// Generic trait that allows to iterate over sub-structures.
///
/// It is only meant to iterate non-recursively over the direct children.
/// Self and O do not have to be the same type and we can also have
/// Children<O1> and Children<O2> implemented for the same type,
/// if the goal is to iterate over sub-structures of different kinds.
pub trait Children<O> {
    /// Returns an iterator over all direct children of kind O in this object.
    fn children(&self) -> Box<dyn Iterator<Item = &O> + '_>;
    /// Returns an iterator over all direct children of kind Q in this object.
    fn children_mut(&mut self) -> Box<dyn Iterator<Item = &mut O> + '_>;
}

pub trait AllChildren<O> {
    /// Returns an iterator over all direct and indirect children of kind `O` in this object.
    /// If `O` and `Self` are the same type, also includes `self`.
    /// Pre-order visitor.
    fn all_children(&self) -> Box<dyn Iterator<Item = &O> + '_>;
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VisitOrder {
    Pre,
    Post,
}

/// A trait to be implemented by an AST node.
///
/// The idea is that it calls a callback function on each of the sub-nodes
/// that are expressions.
/// The difference to the Children<Expr> trait is that ExpressionVisitable
/// visits recursively.
/// If a node implements Children<Expr>, it also implements ExpressionVisitable<Expr>.
pub trait ExpressionVisitable<Expr> {
    /// Traverses the AST and calls `f` on each Expression in pre-order.
    fn pre_visit_expressions_mut<F>(&mut self, f: &mut F)
    where
        F: FnMut(&mut Expr),
    {
        let _ = self.visit_expressions_mut(
            &mut move |e| {
                f(e);
                ControlFlow::Continue::<()>(())
            },
            VisitOrder::Pre,
        );
    }

    /// Traverses the AST and calls `f` on each Expression in post-order.
    fn post_visit_expressions_mut<F>(&mut self, f: &mut F)
    where
        F: FnMut(&mut Expr),
    {
        let _ = self.visit_expressions_mut(
            &mut move |e| {
                f(e);
                ControlFlow::Continue::<()>(())
            },
            VisitOrder::Post,
        );
    }

    fn visit_expressions<F, B>(&self, f: &mut F, order: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&Expr) -> ControlFlow<B>;

    fn visit_expressions_mut<F, B>(&mut self, f: &mut F, order: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&mut Expr) -> ControlFlow<B>;
}

impl<Expr: ExpressionVisitable<Expr>, C: Children<Expr>> ExpressionVisitable<Expr> for C {
    fn visit_expressions_mut<F, B>(&mut self, f: &mut F, o: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&mut Expr) -> ControlFlow<B>,
    {
        self.children_mut()
            .try_for_each(|child| child.visit_expressions_mut(f, o))
    }

    fn visit_expressions<F, B>(&self, f: &mut F, o: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&Expr) -> ControlFlow<B>,
    {
        self.children()
            .try_for_each(|child| child.visit_expressions(f, o))
    }
}

impl<Expr: AllChildren<Expr>, C: Children<Expr>> AllChildren<Expr> for C {
    fn all_children(&self) -> Box<dyn Iterator<Item = &Expr> + '_> {
        Box::new(self.children().flat_map(|e| e.all_children()))
    }
}

impl<T, R> ExpressionVisitable<AlgebraicExpression<T, R>> for AlgebraicExpression<T, R> {
    fn visit_expressions_mut<F, B>(&mut self, f: &mut F, o: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&mut AlgebraicExpression<T, R>) -> ControlFlow<B>,
    {
        if o == VisitOrder::Pre {
            f(self)?;
        }
        self.children_mut()
            .try_for_each(|e| e.visit_expressions_mut(f, o))?;
        if o == VisitOrder::Post {
            f(self)?;
        }
        ControlFlow::Continue(())
    }

    fn visit_expressions<F, B>(&self, f: &mut F, o: VisitOrder) -> ControlFlow<B>
    where
        F: FnMut(&AlgebraicExpression<T, R>) -> ControlFlow<B>,
    {
        if o == VisitOrder::Pre {
            f(self)?;
        }
        self.children()
            .try_for_each(|e| e.visit_expressions(f, o))?;
        if o == VisitOrder::Post {
            f(self)?;
        }
        ControlFlow::Continue(())
    }
}

impl<T, R> AllChildren<AlgebraicExpression<T, R>> for AlgebraicExpression<T, R> {
    fn all_children(&self) -> Box<dyn Iterator<Item = &AlgebraicExpression<T, R>> + '_> {
        Box::new(iter::once(self).chain(self.children().flat_map(|e| e.all_children())))
    }
}


================================================
FILE: isa-utils/Cargo.toml
================================================
[package]
name = "powdr-isa-utils"
description = "powdr utilities for translating from native ISA code (RISCV for now)"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: isa-utils/src/lib.rs
================================================
/// A single 32-bit data value.
pub enum SingleDataValue {
    /// A literal value.
    Value(u32),
    /// The value of a pointer to a text label. Since there may be not a
    /// 1-to-1 correspondence between nativa ISAs and Powdr ASM instructions,
    /// this is passed unresolved to the code generator.
    LabelReference(String),
    /// Currently not supported.
    Offset(String, String),
}

pub fn quote(s: &str) -> String {
    // TODO more things to quote
    format!("\"{}\"", s.replace('\\', "\\\\").replace('\"', "\\\""))
}

pub fn escape_label(l: &str) -> String {
    // TODO make this proper
    l.replace('.', "_dot_")
        .replace('/', "_slash_")
        .replace("[]", "_slice_")
        .replace(",", "_comma_")
        .replace("(", "_left_parens_")
        .replace(")", "_right_parens_")
        .replace("[", "_left_square_")
        .replace("]", "_right_square_")
        .replace("{", "_left_brace_")
        .replace("}", "_right_brace_")
        .replace(" ", "_space_")
        .replace("'", "_quote_")
        .replace("*", "_deref_")
}


================================================
FILE: number/Cargo.toml
================================================
[package]
name = "powdr-number"
description = "powdr finite field definitions"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[dependencies]
ark-bn254 = { version = "0.4.0", default-features = false, features = [
  "scalar_field",
] }
ark-ff = "0.4.2"
ark-serialize = "0.4.2"
p3-baby-bear = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0" }
p3-koala-bear = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0" }
p3-mersenne-31 = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0" }
p3-field = { git = "https://github.com/plonky3/Plonky3.git", rev = "2192432ddf28e7359dd2c577447886463e6124f0" }
num-bigint = { version = "0.4.3", features = ["serde"] }
num-traits.workspace = true
csv = "1.3"
serde = { version = "1.0", default-features = false, features = [
  "alloc",
  "derive",
  "rc",
] }
serde_with = "3.6.1"
schemars = { version = "0.8.16", features = ["preserve_order"] }
ibig = { version = "0.3.6", features = ["serde"] }
serde_cbor.workspace = true
derive_more.workspace = true

[dev-dependencies]
test-log.workspace = true
env_logger.workspace = true

[package.metadata.cargo-udeps.ignore]
development = ["env_logger"]

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: number/src/baby_bear.rs
================================================
use p3_baby_bear::BabyBear;

use crate::powdr_field_plonky3;

powdr_field_plonky3!(BabyBearField, BabyBear);

#[cfg(test)]
mod test {
    use crate::traits::int_from_hex_str;
    use test_log::test;

    use super::*;

    #[test]
    fn bitwise() {
        let n = int_from_hex_str::<BabyBearField>("00ff00ff");
        let p = int_from_hex_str::<BabyBearField>("f00ff00f");
        let not_n = int_from_hex_str::<BabyBearField>("ff00ff00");
        let n_shr_4 = int_from_hex_str::<BabyBearField>("000ff00f");
        let n_shl_4 = int_from_hex_str::<BabyBearField>("0ff00ff0");
        let n_or_p = int_from_hex_str::<BabyBearField>("f0fff0ff");
        let n_and_p = int_from_hex_str::<BabyBearField>("000f000f");
        let n_xor_p = int_from_hex_str::<BabyBearField>("f0f0f0f0");

        assert_eq!(n.not().not(), n);
        assert_eq!(n.not(), not_n);
        assert_eq!(n >> 4, n_shr_4);
        assert_eq!(n << 4, n_shl_4);
        assert_eq!(n & p, n_and_p);
        assert_eq!(n | p, n_or_p);
        assert_eq!(n ^ p, n_xor_p);
    }

    #[test]
    fn zero_one() {
        let x = BabyBearField::ZERO;
        assert_eq!(x, BabyBearField::zero());
        assert_eq!(x.to_canonical_u32(), 0);
        let y = BabyBearField::ONE;
        assert_eq!(y, BabyBearField::one());
        assert_eq!(y.to_canonical_u32(), 1);
        let z = x + y + y;
        assert_eq!(z.to_canonical_u32(), 2);
    }

    #[test]
    fn lower_half() {
        let x = BabyBearField::from(0);
        assert!(x.is_in_lower_half());
        assert!(!(x - 1.into()).is_in_lower_half());

        let y = BabyBearField::from_str_radix("3c000000", 16).unwrap();
        assert!(y.is_in_lower_half());
        assert!(!(y + 1.into()).is_in_lower_half());
    }

    #[test]
    #[should_panic]
    fn integer_div_by_zero() {
        let _ = BabyBearField::from(1).to_arbitrary_integer()
            / BabyBearField::from(0).to_arbitrary_integer();
    }

    #[test]
    #[should_panic]
    fn div_by_zero() {
        let _ = BabyBearField::from(1) / BabyBearField::from(0);
    }

    #[test]
    fn to_signed_integer() {
        let values = [
            i16::MIN as i64,
            i16::MIN as i64 + 1,
            i16::MIN as i64 + 4242,
            -0x6faa21,
            -3456,
            -1,
            0,
            0x6faa21,
            1,
            3456,
            i16::MAX as i64 - 4242,
            i16::MAX as i64 - 1,
            i16::MAX as i64,
        ];
        for &value in &values {
            let field_value = BabyBearField::from(value);
            let signed_integer_value = field_value.to_signed_integer();
            assert_eq!(signed_integer_value, value.into());
        }
    }
}


================================================
FILE: number/src/bn254.rs
================================================
use ark_bn254::Fr;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

powdr_field!(Bn254Field, Fr);

#[cfg(test)]
mod tests {
    use std::ops::*;

    use super::Bn254Field;
    use crate::{traits::int_from_hex_str, FieldElement};
    use test_log::test;

    #[test]
    fn bitwise() {
        let n = int_from_hex_str::<Bn254Field>(
            "00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff",
        );
        let p = int_from_hex_str::<Bn254Field>(
            "000ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00f",
        );
        let not_n = int_from_hex_str::<Bn254Field>(
            "ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00",
        );
        let n_shr_4 = int_from_hex_str::<Bn254Field>(
            "000ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00f",
        );
        let n_shl_4 = int_from_hex_str::<Bn254Field>(
            "0ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff00ff0",
        );
        let n_or_p = int_from_hex_str::<Bn254Field>(
            "00fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0fff0ff",
        );
        let n_and_p = int_from_hex_str::<Bn254Field>(
            "000f000f000f000f000f000f000f000f000f000f000f000f000f000f000f000f",
        );
        let n_xor_p = int_from_hex_str::<Bn254Field>(
            "00f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0",
        );

        assert_eq!(n.not().not(), n);
        assert_eq!(n.not(), not_n);
        assert_eq!(n >> 4, n_shr_4);
        assert_eq!(n << 4, n_shl_4);
        assert_eq!(n & p, n_and_p);
        assert_eq!(n | p, n_or_p);
        assert_eq!(n ^ p, n_xor_p);
    }

    #[test]
    fn minus_one() {
        let minus_one = Bn254Field::from(0) - Bn254Field::from(1);
        assert_eq!(
            minus_one.to_arbitrary_integer(),
            crate::BigUint::from_str_radix(
                "21888242871839275222246405745257275088548364400416034343698204186575808495616",
                10
            )
            .unwrap()
        );
    }

    #[test]
    fn format() {
        let one = Bn254Field::from(1);
        assert_eq!(format!("{one:x}"), "1");
        let minus_one = Bn254Field::from(0) - Bn254Field::from(1);
        assert_eq!(
            format!("{minus_one:x}"),
            "30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000000"
        );
    }

    #[test]
    #[should_panic]
    fn integer_div_by_zero() {
        let _ =
            Bn254Field::from(1).to_arbitrary_integer() / Bn254Field::from(0).to_arbitrary_integer();
    }

    #[test]
    #[should_panic]
    fn div_by_zero() {
        let _ = Bn254Field::from(1) / Bn254Field::from(0);
    }

    #[test]
    fn to_signed_integer() {
        let values = [
            i32::MIN as i64,
            i32::MIN as i64 + 1,
            i32::MIN as i64 + 4242,
            -0x6faa2185,
            -3456,
            -1,
            0,
            0x6faa2185,
            1,
            3456,
            i32::MAX as i64 - 4242,
            i32::MAX as i64 - 1,
            i32::MAX as i64,
        ];
        for &value in &values {
            let field_value = Bn254Field::from(value);
            let signed_integer_value = field_value.to_signed_integer();
            assert_eq!(signed_integer_value, value.into());
        }
    }
}


================================================
FILE: number/src/expression_convertible.rs
================================================
use std::ops::{Add, Mul, Neg, Sub};

use crate::FieldElement;

pub trait ExpressionConvertible<T, V> {
    /// Converts `self` into a structure that supports algebraic operations.
    ///
    /// Fails in case a non-algebraic operation is used.
    ///
    /// The `try_to_number` function is used to check if some conversions can be simplified.
    ///
    /// This or `to_expression` must be implemented.
    fn try_to_expression<
        E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
    >(
        &self,
        number_converter: &impl Fn(&T) -> E,
        var_converter: &impl Fn(&V) -> E,
        _try_to_number: &impl Fn(&E) -> Option<T>,
    ) -> Option<E> {
        Some(self.to_expression(number_converter, var_converter))
    }

    /// Converts `self` into a structure that supports algebraic operations.
    ///
    /// This or `try_to_expression` must be implemented.
    fn to_expression<
        E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
    >(
        &self,
        number_converter: &impl Fn(&T) -> E,
        var_converter: &impl Fn(&V) -> E,
    ) -> E {
        self.try_to_expression(number_converter, var_converter, &|_| unreachable!())
            .unwrap()
    }
}

impl<V, T: FieldElement> ExpressionConvertible<T, V> for T {
    fn to_expression<
        E: Add<E, Output = E> + Sub<E, Output = E> + Mul<E, Output = E> + Neg<Output = E>,
    >(
        &self,
        number_converter: &impl Fn(&T) -> E,
        _var_converter: &impl Fn(&V) -> E,
    ) -> E {
        number_converter(self)
    }
}


================================================
FILE: number/src/goldilocks.rs
================================================
use std::fmt::LowerHex;
use std::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Not, Sub, SubAssign};
use std::str::FromStr;

use ark_ff::{One, Zero};

use num_traits::{ConstOne, ConstZero};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

use core::fmt::{self, Debug, Formatter};
use core::hash::Hash;
#[cfg(target_arch = "x86_64")]
use core::hint::unreachable_unchecked;

use crate::{BigUint, FieldElement, KnownField, LargeInt};

// This implementation is adapted from plonky2. The main change is that we ensure that the stored
// value is always less than the field modulus, since we do conversions from and to canonical
// integers all the time.

const EPSILON: u64 = (1 << 32) - 1;

#[derive(
    Clone,
    Copy,
    PartialEq,
    Eq,
    Debug,
    Default,
    PartialOrd,
    Ord,
    Hash,
    Serialize,
    Deserialize,
    JsonSchema,
    derive_more::Display,
)]
#[repr(transparent)]
pub struct GoldilocksField(u64);

impl GoldilocksField {
    const ORDER: u64 = 0xFFFFFFFF00000001;

    /// Returns the inverse of the field element, using Fermat's little theorem.
    /// The inverse of `a` is computed as `a^(p-2)`, where `p` is the prime order of the field.
    ///
    /// Mathematically, this is equivalent to:
    ///                $a^(p-1)     = 1 (mod p)$
    ///                $a^(p-2) * a = 1 (mod p)$
    /// Therefore      $a^(p-2)     = a^-1 (mod p)$
    ///
    /// The following code has been adapted from winterfell/math/src/field/f64/mod.rs
    /// located at <https://github.com/facebook/winterfell>.
    fn try_inverse(&self) -> Option<Self> {
        if self.is_zero() {
            return None;
        }

        // compute base^(P - 2) using 72 multiplications
        // The exponent P - 2 is represented in binary as:
        // 0b1111111111111111111111111111111011111111111111111111111111111111

        // compute base^11
        let t2 = self.square() * *self;

        // compute base^111
        let t3 = t2.square() * *self;

        // compute base^111111 (6 ones)
        // repeatedly square t3 3 times and multiply by t3
        let t6 = exp_acc::<3>(t3, t3);

        // compute base^111111111111 (12 ones)
        // repeatedly square t6 6 times and multiply by t6
        let t12 = exp_acc::<6>(t6, t6);

        // compute base^111111111111111111111111 (24 ones)
        // repeatedly square t12 12 times and multiply by t12
        let t24 = exp_acc::<12>(t12, t12);

        // compute base^1111111111111111111111111111111 (31 ones)
        // repeatedly square t24 6 times and multiply by t6 first. then square t30 and
        // multiply by base
        let t30 = exp_acc::<6>(t24, t6);
        let t31 = t30.square() * *self;

        // compute base^111111111111111111111111111111101111111111111111111111111111111
        // repeatedly square t31 32 times and multiply by t31
        let t63 = exp_acc::<32>(t31, t31);

        // compute base^1111111111111111111111111111111011111111111111111111111111111111
        Some(t63.square() * *self)
    }

    fn square(&self) -> Self {
        *self * *self
    }

    fn exp_power_of_2(&self, power_log: usize) -> Self {
        let mut res = *self;
        for _ in 0..power_log {
            res = res.square();
        }
        res
    }

    #[inline(always)]
    fn from_canonical_u64(n: u64) -> Self {
        debug_assert!(n < Self::ORDER);
        Self(n)
    }

    #[inline]
    fn from_noncanonical_i64(n: i64) -> Self {
        Self::from_canonical_u64(if n < 0 {
            // If n < 0, then this is guaranteed to overflow since
            // both arguments have their high bit set, so the result
            // is in the canonical range.
            Self::ORDER.wrapping_add(n as u64)
        } else {
            n as u64
        })
    }

    #[inline]
    fn to_canonical_u64(self) -> u64 {
        self.0
    }
}

#[inline]
fn wrap(x: u64) -> u64 {
    if x >= GoldilocksField::ORDER {
        x - GoldilocksField::ORDER
    } else {
        x
    }
}

impl Neg for GoldilocksField {
    type Output = Self;

    #[inline]
    fn neg(self) -> Self {
        if self.is_zero() {
            Self::ZERO
        } else {
            Self(Self::ORDER - self.to_canonical_u64())
        }
    }
}

impl Add for GoldilocksField {
    type Output = Self;

    #[inline]
    #[allow(clippy::suspicious_arithmetic_impl)]
    fn add(self, rhs: Self) -> Self {
        let (sum, over) = self.0.overflowing_add(rhs.0);
        let (sum, over) = sum.overflowing_add((over as u64) * EPSILON);
        debug_assert!(!over);
        Self(wrap(sum))
    }
}

impl AddAssign for GoldilocksField {
    #[inline]
    fn add_assign(&mut self, rhs: Self) {
        *self = *self + rhs
    }
}

impl Sub for GoldilocksField {
    type Output = Self;

    #[inline]
    #[allow(clippy::suspicious_arithmetic_impl)]
    fn sub(self, rhs: Self) -> Self {
        let (diff, under) = self.0.overflowing_sub(rhs.0);
        let (diff, under) = diff.overflowing_sub((under as u64) * EPSILON);
        debug_assert!(!under);
        Self(wrap(diff))
    }
}

impl SubAssign for GoldilocksField {
    #[inline]
    fn sub_assign(&mut self, rhs: Self) {
        *self = *self - rhs
    }
}

impl Mul for GoldilocksField {
    type Output = Self;

    fn mul(self, rhs: Self) -> Self {
        reduce128((self.0 as u128) * (rhs.0 as u128))
    }
}

impl MulAssign for GoldilocksField {
    fn mul_assign(&mut self, rhs: Self) {
        *self = *self * rhs
    }
}

impl Div for GoldilocksField {
    type Output = Self;

    #[allow(clippy::suspicious_arithmetic_impl)]
    fn div(self, rhs: Self) -> Self::Output {
        self * rhs.try_inverse().unwrap()
    }
}

/// Fast addition modulo ORDER for x86-64.
/// This function is marked unsafe for the following reasons:
///   - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
///   - It is only faster in some circumstances. In particular, on x86 it overwrites both inputs in
///     the registers, so its use is not recommended when either input will be used again.
#[inline(always)]
#[cfg(target_arch = "x86_64")]
unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
    let res_wrapped: u64;
    let adjustment: u64;
    core::arch::asm!(
        "add {0}, {1}",
        // Trick. The carry flag is set iff the addition overflowed.
        // sbb x, y does x := x - y - CF. In our case, x and y are both {1:e}, so it simply does
        // {1:e} := 0xffffffff on overflow and {1:e} := 0 otherwise. {1:e} is the low 32 bits of
        // {1}; the high 32-bits are zeroed on write. In the end, we end up with 0xffffffff in {1}
        // on overflow; this happens be EPSILON.
        // Note that the CPU does not realize that the result of sbb x, x does not actually depend
        // on x. We must write the result to a register that we know to be ready. We have a
        // dependency on {1} anyway, so let's use it.
        "sbb {1:e}, {1:e}",
        inlateout(reg) x => res_wrapped,
        inlateout(reg) y => adjustment,
        options(pure, nomem, nostack),
    );
    assume(x != 0 || (res_wrapped == y && adjustment == 0));
    assume(y != 0 || (res_wrapped == x && adjustment == 0));
    // Add EPSILON == subtract ORDER.
    // Cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
    res_wrapped + adjustment
}

#[inline(always)]
#[cfg(not(target_arch = "x86_64"))]
const unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
    let (res_wrapped, carry) = x.overflowing_add(y);
    // Below cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
    res_wrapped + EPSILON * (carry as u64)
}

/// Reduces to a 64-bit value. The result is in canonical form.
#[inline]
fn reduce128(x: u128) -> GoldilocksField {
    let (x_lo, x_hi) = split(x); // This is a no-op
    let x_hi_hi = x_hi >> 32;
    let x_hi_lo = x_hi & EPSILON;

    let (mut t0, borrow) = x_lo.overflowing_sub(x_hi_hi);
    if borrow {
        branch_hint(); // A borrow is exceedingly rare. It is faster to branch.
        t0 -= EPSILON; // Cannot underflow.
    }
    let t1 = x_hi_lo * EPSILON;
    let t2 = unsafe { add_no_canonicalize_trashing_input(t0, t1) };

    GoldilocksField(wrap(t2))
}

/// Squares the base N number of times and multiplies the result by the tail value.
#[inline(always)]
fn exp_acc<const N: usize>(base: GoldilocksField, tail: GoldilocksField) -> GoldilocksField {
    base.exp_power_of_2(N) * tail
}

#[inline]
const fn split(x: u128) -> (u64, u64) {
    (x as u64, (x >> 64) as u64)
}

#[inline(always)]
#[cfg(target_arch = "x86_64")]
pub fn assume(p: bool) {
    debug_assert!(p);
    if !p {
        unsafe {
            unreachable_unchecked();
        }
    }
}

/// Try to force Rust to emit a branch. Example:
///     if x > 2 {
///         y = foo();
///         branch_hint();
///     } else {
///         y = bar();
///     }
/// This function has no semantics. It is a hint only.
#[inline(always)]
pub fn branch_hint() {
    // NOTE: These are the currently supported assembly architectures. See the
    // [nightly reference](https://doc.rust-lang.org/nightly/reference/inline-assembly.html) for
    // the most up-to-date list.
    #[cfg(any(
        target_arch = "aarch64",
        target_arch = "arm",
        target_arch = "riscv32",
        target_arch = "riscv64",
        target_arch = "x86",
        target_arch = "x86_64",
    ))]
    unsafe {
        core::arch::asm!("", options(nomem, nostack, preserves_flags));
    }
}

impl FieldElement for GoldilocksField {
    type Integer = GLLargeInt;

    const BITS: u32 = 64;

    fn to_degree(&self) -> crate::DegreeType {
        self.to_canonical_u64()
    }

    fn to_integer(&self) -> Self::Integer {
        self.to_canonical_u64().into()
    }

    #[inline]
    fn modulus() -> Self::Integer {
        Self::ORDER.into()
    }

    fn pow(self, exp: Self::Integer) -> Self {
        let mut exp = exp.0;
        if exp == 0 {
            return 1.into();
        } else if exp == 1 {
            return self;
        }
        let mut x = self;
        let mut r: Self = 1.into();
        while exp >= 2 {
            if exp & 1 != 0 {
                r *= x;
            }
            x = x.square();
            exp >>= 1;
        }
        r * x
    }

    fn to_bytes_le(&self) -> Vec<u8> {
        self.to_canonical_u64().to_le_bytes().to_vec()
    }

    fn from_bytes_le(bytes: &[u8]) -> Self {
        wrap(u64::try_from(BigUint::from_le_bytes(bytes)).unwrap()).into()
    }

    fn from_str_radix(s: &str, radix: u32) -> Result<Self, String> {
        let n = u64::from_str_radix(s, radix).map_err(|e| e.to_string())?;
        if n < Self::ORDER {
            Ok(Self::from_canonical_u64(n))
        } else {
            Err(format!("Number \"{s}\" too large for Goldilocks field."))
        }
    }

    fn checked_from(value: ibig::UBig) -> Option<Self> {
        if value < Self::modulus().to_arbitrary_integer() {
            Some(u64::try_from(value).unwrap().into())
        } else {
            None
        }
    }

    fn is_in_lower_half(&self) -> bool {
        self.to_canonical_u64() <= (Self::ORDER - 1) / 2
    }

    fn known_field() -> Option<crate::KnownField> {
        Some(KnownField::GoldilocksField)
    }

    fn has_direct_repr() -> bool {
        true
    }
}

impl LowerHex for GoldilocksField {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        LowerHex::fmt(&self.to_canonical_u64(), f)
    }
}

impl From<bool> for GoldilocksField {
    fn from(b: bool) -> Self {
        Self(b as u64)
    }
}

impl From<i64> for GoldilocksField {
    fn from(n: i64) -> Self {
        Self::from_noncanonical_i64(n)
    }
}

impl From<i32> for GoldilocksField {
    fn from(n: i32) -> Self {
        From::<i64>::from(n as i64)
    }
}

impl From<u32> for GoldilocksField {
    fn from(n: u32) -> Self {
        Self::from_canonical_u64(n as u64)
    }
}

impl From<u64> for GoldilocksField {
    #[inline]
    fn from(n: u64) -> Self {
        Self(wrap(n))
    }
}

impl From<crate::BigUint> for GoldilocksField {
    fn from(n: crate::BigUint) -> Self {
        u64::try_from(n).unwrap().into()
    }
}

impl From<GLLargeInt> for GoldilocksField {
    #[inline]
    fn from(n: GLLargeInt) -> Self {
        Self(wrap(n.0))
    }
}

impl ConstZero for GoldilocksField {
    const ZERO: Self = Self(0);
}

impl Zero for GoldilocksField {
    fn zero() -> Self {
        Self::ZERO
    }

    fn is_zero(&self) -> bool {
        self.0 == 0
    }
}

impl ConstOne for GoldilocksField {
    const ONE: Self = Self(1);
}

impl One for GoldilocksField {
    fn one() -> Self {
        Self::ONE
    }

    fn is_one(&self) -> bool {
        self.to_canonical_u64() == 1
    }
}

impl FromStr for GoldilocksField {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let n = BigUint::from_str(s).map_err(|e| e.to_string())?;
        let modulus = Self::modulus();
        if n >= modulus.to_arbitrary_integer() {
            Err(format!("Decimal number \"{s}\" too large for field."))
        } else {
            Ok(n.into())
        }
    }
}

#[derive(
    Clone,
    Copy,
    PartialEq,
    Eq,
    Debug,
    Default,
    PartialOrd,
    Ord,
    Hash,
    derive_more::Display,
    Serialize,
    Deserialize,
    JsonSchema,
    derive_more::Mul,
    derive_more::Add,
    derive_more::Sub,
    derive_more::AddAssign,
    derive_more::SubAssign,
    derive_more::MulAssign,
    derive_more::Shr,
    derive_more::Shl,
    derive_more::BitAnd,
    derive_more::BitOr,
    derive_more::BitXor,
    derive_more::BitAndAssign,
    derive_more::BitOrAssign,
    derive_more::BitXorAssign,
)]
pub struct GLLargeInt(u64);

impl LargeInt for GLLargeInt {
    const MAX: Self = Self(u64::MAX);
    const NUM_BITS: usize = 64;

    fn to_arbitrary_integer(self) -> ibig::UBig {
        self.0.into()
    }

    fn num_bits(&self) -> usize {
        Self::NUM_BITS - self.0.leading_zeros() as usize
    }

    fn one() -> Self {
        Self(1)
    }

    fn is_one(&self) -> bool {
        self.0 == 1
    }

    fn try_into_u64(&self) -> Option<u64> {
        Some(self.0)
    }

    fn try_into_u32(&self) -> Option<u32> {
        u32::try_from(self.0).ok()
    }

    fn from_hex(s: &str) -> Self {
        Self(u64::from_str_radix(s, 16).unwrap())
    }
}

impl From<u32> for GLLargeInt {
    fn from(value: u32) -> Self {
        Self(value as u64)
    }
}

impl From<u64> for GLLargeInt {
    fn from(value: u64) -> Self {
        Self(value)
    }
}

impl Zero for GLLargeInt {
    fn zero() -> Self {
        Self(0)
    }

    fn is_zero(&self) -> bool {
        self.0 == 0
    }
}

impl ConstZero for GLLargeInt {
    const ZERO: Self = Self(0);
}

impl LowerHex for GLLargeInt {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        LowerHex::fmt(&self.0, f)
    }
}

impl Not for GLLargeInt {
    type Output = Self;

    fn not(self) -> Self::Output {
        Self(!self.0)
    }
}

#[cfg(test)]
mod test {
    use crate::traits::int_from_hex_str;
    use test_log::test;

    use super::*;

    #[test]
    fn bitwise() {
        let n = int_from_hex_str::<GoldilocksField>("00ff00ff00ff00ff");
        let p = int_from_hex_str::<GoldilocksField>("000ff00ff00ff00f");
        let not_n = int_from_hex_str::<GoldilocksField>("ff00ff00ff00ff00");
        let n_shr_4 = int_from_hex_str::<GoldilocksField>("000ff00ff00ff00f");
        let n_shl_4 = int_from_hex_str::<GoldilocksField>("0ff00ff00ff00ff0");
        let n_or_p = int_from_hex_str::<GoldilocksField>("00fff0fff0fff0ff");
        let n_and_p = int_from_hex_str::<GoldilocksField>("000f000f000f000f");
        let n_xor_p = int_from_hex_str::<GoldilocksField>("00f0f0f0f0f0f0f0");

        assert_eq!(n.not().not(), n);
        assert_eq!(n.not(), not_n);
        assert_eq!(n >> 4, n_shr_4);
        assert_eq!(n << 4, n_shl_4);
        assert_eq!(n & p, n_and_p);
        assert_eq!(n | p, n_or_p);
        assert_eq!(n ^ p, n_xor_p);
    }

    #[test]
    fn lower_half() {
        let x = GoldilocksField::from(0);
        assert!(x.is_in_lower_half());
        assert!(!(x - 1.into()).is_in_lower_half());

        let y = GoldilocksField::from_str_radix("7fffffff80000000", 16).unwrap();
        assert!(y.is_in_lower_half());
        assert!(!(y + 1.into()).is_in_lower_half());
    }

    #[test]
    fn from_str_radix_rejects_modulus() {
        // ORDER = 0xffffffff00000001, should be rejected
        assert!(GoldilocksField::from_str_radix("ffffffff00000001", 16).is_err());
    }

    #[test]
    fn from_str_radix_accepts_order_minus_one() {
        // ORDER - 1 = 0xffffffff00000000, should be accepted and equal to the literal value
        let v = GoldilocksField::from_str_radix("ffffffff00000000", 16).unwrap();
        assert_eq!(v.to_canonical_u64(), 0xffff_ffff_0000_0000);
    }

    #[test]
    #[should_panic]
    fn integer_div_by_zero() {
        let _ = GoldilocksField::from(1).to_arbitrary_integer()
            / GoldilocksField::from(0).to_arbitrary_integer();
    }

    #[test]
    #[should_panic]
    fn div_by_zero() {
        let _ = GoldilocksField::from(1) / GoldilocksField::from(0);
    }
}


================================================
FILE: number/src/koala_bear.rs
================================================
use p3_koala_bear::KoalaBear;

use crate::powdr_field_plonky3;

powdr_field_plonky3!(KoalaBearField, KoalaBear);

#[cfg(test)]
mod test {
    use crate::traits::int_from_hex_str;
    use test_log::test;

    use super::*;

    #[test]
    fn bitwise() {
        let n = int_from_hex_str::<KoalaBearField>("00ff00ff");
        let p = int_from_hex_str::<KoalaBearField>("f00ff00f");
        let not_n = int_from_hex_str::<KoalaBearField>("ff00ff00");
        let n_shr_4 = int_from_hex_str::<KoalaBearField>("000ff00f");
        let n_shl_4 = int_from_hex_str::<KoalaBearField>("0ff00ff0");
        let n_or_p = int_from_hex_str::<KoalaBearField>("f0fff0ff");
        let n_and_p = int_from_hex_str::<KoalaBearField>("000f000f");
        let n_xor_p = int_from_hex_str::<KoalaBearField>("f0f0f0f0");

        assert_eq!(n.not().not(), n);
        assert_eq!(n.not(), not_n);
        assert_eq!(n >> 4, n_shr_4);
        assert_eq!(n << 4, n_shl_4);
        assert_eq!(n & p, n_and_p);
        assert_eq!(n | p, n_or_p);
        assert_eq!(n ^ p, n_xor_p);
    }

    #[test]
    fn zero_one() {
        let x = KoalaBearField::ZERO;
        assert_eq!(x, KoalaBearField::zero());
        assert_eq!(x.to_canonical_u32(), 0);
        let y = KoalaBearField::ONE;
        assert_eq!(y, KoalaBearField::one());
        assert_eq!(y.to_canonical_u32(), 1);
        let z = x + y + y;
        assert_eq!(z.to_canonical_u32(), 2);
    }

    #[test]
    fn lower_half() {
        let x = KoalaBearField::from(0);
        assert!(x.is_in_lower_half());
        assert!(!(x - 1.into()).is_in_lower_half());

        let y = KoalaBearField::from_str_radix("3f800000", 16).unwrap();
        assert!(y.is_in_lower_half());
        assert!(!(y + 1.into()).is_in_lower_half());
    }

    #[test]
    #[should_panic]
    fn integer_div_by_zero() {
        let _ = KoalaBearField::from(1).to_arbitrary_integer()
            / KoalaBearField::from(0).to_arbitrary_integer();
    }

    #[test]
    #[should_panic]
    fn div_by_zero() {
        let _ = KoalaBearField::from(1) / KoalaBearField::from(0);
    }
}


================================================
FILE: number/src/lib.rs
================================================
//! Numerical types used across powdr

#[macro_use]
mod macros;
mod baby_bear;
mod bn254;
mod goldilocks;
mod koala_bear;
mod mersenne31;
#[macro_use]
mod plonky3_macros;
mod expression_convertible;
mod serialize;
mod traits;

pub use serialize::{
    buffered_write_file, read_polys_csv_file, write_polys_csv_file, CsvRenderMode, ReadWrite,
};

pub use baby_bear::BabyBearField;
pub use bn254::Bn254Field;
pub use expression_convertible::ExpressionConvertible;
pub use goldilocks::GoldilocksField;
pub use koala_bear::KoalaBearField;
pub use mersenne31::Mersenne31Field;
pub use traits::{FieldSize, KnownField};

pub use ibig::{IBig as BigInt, UBig as BigUint};
pub use traits::{FieldElement, LargeInt};
/// An arbitrary precision big integer, to be used as a last recourse
/// The type of polynomial degrees and indices into columns.
pub type DegreeType = u64;

/// Returns Some(i) if n == 2**i and None otherwise.
pub fn log2_exact(n: BigUint) -> Option<usize> {
    n.trailing_zeros()
        .filter(|zeros| n == (BigUint::from(1u32) << zeros))
}

#[cfg(test)]
mod test {
    use super::*;
    use test_log::test;

    #[test]
    fn log2_exact_function() {
        assert_eq!(log2_exact(0u32.into()), None);
        assert_eq!(log2_exact(1u32.into()), Some(0));
        assert_eq!(log2_exact(2u32.into()), Some(1));
        assert_eq!(log2_exact(4u32.into()), Some(2));
        assert_eq!(log2_exact(BigUint::from(1u32) << 300), Some(300));
        assert_eq!(log2_exact(17u32.into()), None);
    }
}


================================================
FILE: number/src/macros.rs
================================================
macro_rules! powdr_field {
    ($name:ident, $ark_type:ty) => {
        use crate::{
            traits::{FieldElement, KnownField, LargeInt},
            BigUint, DegreeType,
        };
        use ark_ff::{BigInteger, Field, PrimeField};
        use num_traits::{ConstOne, ConstZero, One, Zero};
        use std::fmt;
        use std::ops::*;
        use std::str::FromStr;

        #[derive(
            Clone,
            Copy,
            PartialEq,
            Eq,
            Debug,
            Default,
            PartialOrd,
            Ord,
            Hash,
            Serialize,
            Deserialize,
            JsonSchema,
        )]
        pub struct $name {
            #[serde(
                serialize_with = "crate::serialize::ark_se",
                deserialize_with = "crate::serialize::ark_de"
            )]
            #[schemars(skip)]
            value: $ark_type,
        }

        #[derive(Clone, Copy, PartialEq, Eq, Debug, Default, PartialOrd, Ord, Hash)]
        pub struct LargeIntImpl {
            value: <$ark_type as PrimeField>::BigInt,
        }

        impl fmt::Display for LargeIntImpl {
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                write!(f, "{}", self.value)
            }
        }

        impl fmt::LowerHex for LargeIntImpl {
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                let val = self.to_arbitrary_integer();

                fmt::LowerHex::fmt(&val, f)
            }
        }

        impl LargeIntImpl {
            const fn new(value: <$ark_type as PrimeField>::BigInt) -> Self {
                Self { value }
            }
        }

        impl From<u32> for LargeIntImpl {
            fn from(v: u32) -> Self {
                Self::new(v.into())
            }
        }

        impl From<u64> for LargeIntImpl {
            fn from(v: u64) -> Self {
                Self::new(v.into())
            }
        }

        impl Shl<usize> for LargeIntImpl {
            type Output = Self;

            fn shl(self, other: usize) -> Self {
                (BigUint::from_le_bytes(&self.value.to_bytes_le()) << other)
                    .try_into()
                    .unwrap()
            }
        }

        impl Shr<usize> for LargeIntImpl {
            type Output = Self;

            fn shr(self, other: usize) -> Self {
                (BigUint::from_le_bytes(&self.value.to_bytes_le()) >> other)
                    .try_into()
                    .unwrap()
            }
        }

        impl BitAnd for LargeIntImpl {
            type Output = Self;

            fn bitand(mut self, other: Self) -> Self {
                for (x, y) in self
                    .value
                    .as_mut()
                    .iter_mut()
                    .zip(other.value.as_ref().iter())
                {
                    *x &= y;
                }
                self
            }
        }

        impl BitOr for LargeIntImpl {
            type Output = Self;

            fn bitor(mut self, other: Self) -> Self {
                for (x, y) in self
                    .value
                    .as_mut()
                    .iter_mut()
                    .zip(other.value.as_ref().iter())
                {
                    *x |= y;
                }
                self
            }
        }

        impl BitXor for LargeIntImpl {
            type Output = Self;

            fn bitxor(mut self, other: Self) -> Self {
                for (x, y) in self
                    .value
                    .as_mut()
                    .iter_mut()
                    .zip(other.value.as_ref().iter())
                {
                    *x ^= y;
                }
                self
            }
        }

        impl BitOrAssign for LargeIntImpl {
            fn bitor_assign(&mut self, other: Self) {
                for (x, y) in self
                    .value
                    .as_mut()
                    .iter_mut()
                    .zip(other.value.as_ref().iter())
                {
                    *x |= y;
                }
            }
        }

        impl BitAndAssign for LargeIntImpl {
            fn bitand_assign(&mut self, other: Self) {
                for (x, y) in self
                    .value
                    .as_mut()
                    .iter_mut()
                    .zip(other.value.as_ref().iter())
                {
                    *x &= y;
                }
            }
        }

        impl Not for LargeIntImpl {
            type Output = Self;

            fn not(mut self) -> Self::Output {
                for limb in self.value.as_mut() {
                    *limb = !*limb;
                }
                self
            }
        }

        impl AddAssign for LargeIntImpl {
            fn add_assign(&mut self, other: Self) {
                self.value.add_with_carry(&other.value);
            }
        }

        impl Add for LargeIntImpl {
            type Output = Self;
            fn add(mut self, other: Self) -> Self {
                self.add_assign(other);
                self
            }
        }

        impl SubAssign for LargeIntImpl {
            fn sub_assign(&mut self, other: Self) {
                self.value.sub_with_borrow(&other.value);
            }
        }

        impl Sub for LargeIntImpl {
            type Output = Self;
            fn sub(mut self, other: Self) -> Self {
                self.sub_assign(other);
                self
            }
        }

        impl Zero for LargeIntImpl {
            #[inline]
            fn zero() -> Self {
                LargeIntImpl::new(<$ark_type as PrimeField>::BigInt::zero())
            }
            #[inline]
            fn is_zero(&self) -> bool {
                self.value.is_zero()
            }
        }

        impl TryFrom<BigUint> for LargeIntImpl {
            type Error = ();

            fn try_from(n: BigUint) -> Result<Self, ()> {
                let n = num_bigint::BigUint::from_bytes_le(&n.to_le_bytes());
                Ok(Self {
                    value: <$ark_type as PrimeField>::BigInt::try_from(n)?,
                })
            }
        }

        impl LargeInt for LargeIntImpl {
            const MAX: Self = LargeIntImpl::new(<$ark_type as PrimeField>::BigInt::new(
                [u64::MAX; <$ark_type as PrimeField>::BigInt::NUM_LIMBS],
            ));
            const NUM_BITS: usize = <$ark_type as PrimeField>::BigInt::NUM_LIMBS * 64;
            #[inline]
            fn to_arbitrary_integer(self) -> BigUint {
                BigUint::from_le_bytes(&self.value.to_bytes_le())
            }
            fn num_bits(&self) -> usize {
                self.value.num_bits() as usize
            }
            #[inline]
            fn one() -> Self {
                LargeIntImpl::new(<$ark_type as PrimeField>::BigInt::one())
            }
            #[inline]
            fn is_one(&self) -> bool {
                self.value == <$ark_type as PrimeField>::BigInt::one()
            }

            fn try_into_u64(&self) -> Option<u64> {
                for v in self.value.0[1..].iter() {
                    if *v != 0 {
                        return None;
                    }
                }
                Some(self.value.0[0])
            }

            fn try_into_u32(&self) -> Option<u32> {
                let v = self.try_into_u64()?;
                v.try_into().ok()
            }

            fn from_hex(s: &str) -> Self {
                BigUint::from_str_radix(s, 16).unwrap().try_into().unwrap()
            }
        }

        impl ConstZero for LargeIntImpl {
            const ZERO: Self = LargeIntImpl::new(<$ark_type as PrimeField>::BigInt::zero());
        }

        impl From<BigUint> for $name {
            fn from(n: BigUint) -> Self {
                let n = num_bigint::BigUint::from_bytes_le(&n.to_le_bytes());
                Self { value: n.into() }
            }
        }

        impl From<LargeIntImpl> for $name {
            fn from(n: LargeIntImpl) -> Self {
                Self {
                    value: n.value.into(),
                }
            }
        }

        impl From<u32> for $name {
            fn from(n: u32) -> Self {
                (<$ark_type>::from(n)).into()
            }
        }

        impl From<u64> for $name {
            fn from(n: u64) -> Self {
                (<$ark_type>::from(n)).into()
            }
        }

        impl From<i32> for $name {
            fn from(n: i32) -> Self {
                (<$ark_type>::from(n)).into()
            }
        }

        impl From<i64> for $name {
            fn from(n: i64) -> Self {
                (<$ark_type>::from(n)).into()
            }
        }

        impl From<bool> for $name {
            fn from(n: bool) -> Self {
                (<$ark_type>::from(n)).into()
            }
        }

        impl FromStr for $name {
            type Err = String;
            fn from_str(s: &str) -> Result<Self, Self::Err> {
                let n = BigUint::from_str(s).map_err(|e| e.to_string())?;
                let modulus = <$ark_type>::MODULUS.to_bytes_le();
                if n >= BigUint::from_le_bytes(&modulus) {
                    Err(format!("Decimal number \"{s}\" too large for field."))
                } else {
                    Ok(n.into())
                }
            }
        }

        impl fmt::LowerHex for $name {
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                fmt::LowerHex::fmt(&self.to_integer(), f)
            }
        }

        impl FieldElement for $name {
            type Integer = LargeIntImpl;
            const BITS: u32 = <$ark_type>::MODULUS_BIT_SIZE;

            fn known_field() -> Option<KnownField> {
                Some(KnownField::$name)
            }

            fn from_str_radix(s: &str, radix: u32) -> Result<Self, String> {
                let n = BigUint::from_str_radix(s, radix).map_err(|e| e.to_string())?;
                let modulus = <$ark_type>::MODULUS.to_bytes_le();
                if n >= BigUint::from_le_bytes(&modulus) {
                    Err(format!("Hexadecimal number \"0x{s}\" too large for field."))
                } else {
                    Ok(n.into())
                }
            }

            fn checked_from(value: BigUint) -> Option<Self> {
                let modulus = <$ark_type>::MODULUS.to_bytes_le();
                if value < BigUint::from_le_bytes(&modulus) {
                    Some(value.into())
                } else {
                    None
                }
            }

            fn to_degree(&self) -> DegreeType {
                let degree: BigUint = self.to_integer().to_arbitrary_integer();
                degree.try_into().unwrap()
            }

            fn to_integer(&self) -> Self::Integer {
                Self::Integer::new(self.value.into_bigint())
            }

            fn modulus() -> Self::Integer {
                Self::Integer::new(<$ark_type>::MODULUS)
            }

            fn pow(self, exponent: Self::Integer) -> Self {
                Self {
                    value: self.value.pow(exponent.value),
                }
            }

            fn to_bytes_le(&self) -> Vec<u8> {
                self.value.into_bigint().to_bytes_le()
            }

            fn from_bytes_le(bytes: &[u8]) -> Self {
                assert_eq!(
                    bytes.len(),
                    <$ark_type as PrimeField>::BigInt::NUM_LIMBS * 8,
                    "wrong number of bytes for field type"
                );

                let mut limbs = [0u64; <$ark_type as PrimeField>::BigInt::NUM_LIMBS];
                for (from, to) in bytes.chunks(8).zip(limbs.iter_mut()) {
                    *to = u64::from_le_bytes(from.try_into().unwrap());
                }

                Self {
                    value: <$ark_type as PrimeField>::BigInt::new(limbs).into(),
                }
            }

            fn is_in_lower_half(&self) -> bool {
                self.to_integer().value <= <$ark_type>::MODULUS_MINUS_ONE_DIV_TWO
            }

            fn has_direct_repr() -> bool {
                false
            }
        }

        impl From<$ark_type> for $name {
            #[inline]
            fn from(value: $ark_type) -> Self {
                Self { value }
            }
        }

        // Add

        impl std::ops::Add for $name {
            type Output = $name;

            #[inline]
            fn add(self, rhs: Self) -> Self::Output {
                (self.value + rhs.value).into()
            }
        }

        impl AddAssign for $name {
            fn add_assign(&mut self, rhs: Self) {
                self.value.add_assign(rhs.value);
            }
        }

        // Sub

        impl std::ops::Sub for $name {
            type Output = $name;

            fn sub(self, rhs: Self) -> Self::Output {
                (self.value - rhs.value).into()
            }
        }

        impl SubAssign for $name {
            fn sub_assign(&mut self, rhs: Self) {
                self.value.sub_assign(rhs.value);
            }
        }

        // Mul

        impl std::ops::Mul for $name {
            type Output = $name;

            fn mul(self, rhs: Self) -> Self::Output {
                (self.value * rhs.value).into()
            }
        }

        impl std::ops::MulAssign for $name {
            fn mul_assign(&mut self, rhs: Self) {
                self.value.mul_assign(rhs.value);
            }
        }

        // Div

        impl std::ops::Div for $name {
            type Output = $name;

            fn div(self, rhs: Self) -> Self::Output {
                (self.value / rhs.value).into()
            }
        }

        impl std::ops::Neg for $name {
            type Output = $name;

            #[inline]
            fn neg(self) -> Self::Output {
                (-self.value).into()
            }
        }

        impl Zero for $name {
            #[inline]
            fn zero() -> Self {
                <$ark_type>::ZERO.into()
            }
            #[inline]
            fn is_zero(&self) -> bool {
                self.value == <$ark_type>::ZERO
            }
        }

        impl ConstZero for $name {
            const ZERO: Self = Self {
                value: <$ark_type>::ZERO,
            };
        }

        impl One for $name {
            #[inline]
            fn one() -> Self {
                <$ark_type>::ONE.into()
            }
            #[inline]
            fn is_one(&self) -> bool {
                self.value == <$ark_type>::ONE
            }
        }

        impl ConstOne for $name {
            const ONE: Self = Self {
                value: <$ark_type>::ONE,
            };
        }

        impl fmt::Display for $name {
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                let value = self.to_integer().value;
                write!(f, "{value}")
            }
        }
    };
}


================================================
FILE: number/src/mersenne31.rs
================================================
use p3_mersenne_31::Mersenne31;

use crate::powdr_field_plonky3;

powdr_field_plonky3!(Mersenne31Field, Mersenne31);

#[cfg(test)]
mod test {
    use crate::traits::int_from_hex_str;
    use test_log::test;

    use super::*;

    #[test]
    fn bitwise() {
        let n = int_from_hex_str::<Mersenne31Field>("00ff00ff");
        let p = int_from_hex_str::<Mersenne31Field>("f00ff00f");
        let not_n = int_from_hex_str::<Mersenne31Field>("ff00ff00");
        let n_shr_4 = int_from_hex_str::<Mersenne31Field>("000ff00f");
        let n_shl_4 = int_from_hex_str::<Mersenne31Field>("0ff00ff0");
        let n_or_p = int_from_hex_str::<Mersenne31Field>("f0fff0ff");
        let n_and_p = int_from_hex_str::<Mersenne31Field>("000f000f");
        let n_xor_p = int_from_hex_str::<Mersenne31Field>("f0f0f0f0");

        assert_eq!(n.not().not(), n);
        assert_eq!(n.not(), not_n);
        assert_eq!(n >> 4, n_shr_4);
        assert_eq!(n << 4, n_shl_4);
        assert_eq!(n & p, n_and_p);
        assert_eq!(n | p, n_or_p);
        assert_eq!(n ^ p, n_xor_p);
    }

    #[test]
    fn zero_one() {
        let x = Mersenne31Field::ZERO;
        assert_eq!(x, Mersenne31Field::zero());
        assert_eq!(x.to_canonical_u32(), 0);
        let y = Mersenne31Field::ONE;
        assert_eq!(y, Mersenne31Field::one());
        assert_eq!(y.to_canonical_u32(), 1);
        let z = x + y + y;
        assert_eq!(z.to_canonical_u32(), 2);
    }

    #[test]
    fn lower_half() {
        let x = Mersenne31Field::from(0);
        assert!(x.is_in_lower_half());
        assert!(!(x - 1.into()).is_in_lower_half());

        let y = Mersenne31Field::from_str_radix("3fffffff", 16).unwrap();
        assert!(y.is_in_lower_half());
        assert!(!(y + 1.into()).is_in_lower_half());
    }

    #[test]
    #[should_panic]
    fn integer_div_by_zero() {
        let _ = Mersenne31Field::from(1).to_arbitrary_integer()
            / Mersenne31Field::from(0).to_arbitrary_integer();
    }

    #[test]
    #[should_panic]
    fn div_by_zero() {
        let _ = Mersenne31Field::from(1) / Mersenne31Field::from(0);
    }
}


================================================
FILE: number/src/plonky3_macros.rs
================================================
#[macro_export]
macro_rules! powdr_field_plonky3 {
    ($name:ident, $p3_type:ty) => {
        use schemars::{
            schema::{Schema, SchemaObject},
            JsonSchema,
        };
        use serde::{Deserialize, Serialize};

        use num_traits::{ConstOne, ConstZero};
        use std::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Not, Sub, SubAssign};
        use std::str::FromStr;
        use std::{collections::BTreeSet, fmt::LowerHex};

        use ark_ff::{One, Zero};
        use $crate::{BigUint, FieldElement, KnownField, LargeInt};

        use core::fmt::{self, Debug, Formatter};
        use core::hash::Hash;

        use p3_field::{AbstractField, Field, PrimeField32};

        #[derive(
            Debug,
            Copy,
            Clone,
            Default,
            Eq,
            Hash,
            PartialEq,
            Ord,
            PartialOrd,
            Serialize,
            Deserialize,
            derive_more::Display,
        )]
        pub struct $name($p3_type);

        impl $name {
            #[inline(always)]
            fn from_canonical_u32(n: u32) -> Self {
                Self(<$p3_type>::from_canonical_u32(n))
            }

            #[inline]
            fn to_canonical_u32(self) -> u32 {
                self.0.as_canonical_u32()
            }

            pub fn into_inner(self) -> $p3_type {
                self.0
            }

            pub fn from_inner(e: $p3_type) -> Self {
                Self(e)
            }
        }

        impl FieldElement for $name {
            type Integer = BBLargeInt;

            const BITS: u32 = 31;

            fn to_degree(&self) -> $crate::DegreeType {
                self.to_canonical_u32() as u64
            }

            fn to_integer(&self) -> Self::Integer {
                self.to_canonical_u32().into()
            }

            #[inline]
            fn modulus() -> Self::Integer {
                let p: u32 = <$p3_type>::order().try_into().unwrap();
                p.into()
            }

            fn pow(self, exp: Self::Integer) -> Self {
                Self(<$p3_type>::exp_u64_generic(
                    self.0,
                    exp.try_into_u64().unwrap(),
                ))
            }

            fn to_bytes_le(&self) -> Vec<u8> {
                self.to_canonical_u32().to_le_bytes().to_vec()
            }

            fn from_bytes_le(bytes: &[u8]) -> Self {
                let u = u32::from_le_bytes(bytes.try_into().unwrap());
                Self::from_canonical_u32(u)
            }

            fn from_str_radix(s: &str, radix: u32) -> Result<Self, String> {
                u32::from_str_radix(s, radix)
                    .map(Self::from_canonical_u32)
                    .map_err(|e| e.to_string())
            }

            fn checked_from(value: ibig::UBig) -> Option<Self> {
                if value < Self::modulus().to_arbitrary_integer() {
                    Some(u32::try_from(value).unwrap().into())
                } else {
                    None
                }
            }

            fn is_in_lower_half(&self) -> bool {
                let p: u32 = <$p3_type>::order().try_into().unwrap();
                self.to_canonical_u32() <= (p - 1) / 2
            }

            fn known_field() -> Option<$crate::KnownField> {
                Some(KnownField::$name)
            }

            fn has_direct_repr() -> bool {
                // No direct repr, because 'mod' is not always applied.
                false
            }
        }

        impl LowerHex for $name {
            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
                LowerHex::fmt(&self.to_canonical_u32(), f)
            }
        }

        impl From<bool> for $name {
            fn from(b: bool) -> Self {
                Self(<$p3_type>::from_bool(b))
            }
        }

        impl From<i64> for $name {
            fn from(n: i64) -> Self {
                Self::from(if n < 0 {
                    // If n < 0, then this is guaranteed to overflow since
                    // both arguments have their high bit set, so the result
                    // is in the canonical range.
                    Self::modulus()
                        .try_into_u64()
                        .unwrap()
                        .wrapping_add(n as u64)
                } else {
                    n as u64
                })
            }
        }

        impl From<i32> for $name {
            fn from(n: i32) -> Self {
                From::<i64>::from(n as i64)
            }
        }

        impl From<u32> for $name {
            fn from(n: u32) -> Self {
                Self(<$p3_type>::from_wrapped_u32(n))
            }
        }

        impl From<u64> for $name {
            #[inline]
            fn from(n: u64) -> Self {
                Self(<$p3_type>::from_wrapped_u64(n))
            }
        }

        impl From<$crate::BigUint> for $name {
            fn from(n: $crate::BigUint) -> Self {
                u64::try_from(n).unwrap().into()
            }
        }

        impl From<BBLargeInt> for $name {
            #[inline]
            fn from(n: BBLargeInt) -> Self {
                n.0.into()
            }
        }

        impl ConstZero for $name {
            const ZERO: Self = $name(<$p3_type>::new(0));
        }

        impl Zero for $name {
            fn zero() -> Self {
                Self(<$p3_type>::zero())
            }

            fn is_zero(&self) -> bool {
                self.0.is_zero()
            }
        }

        impl ConstOne for $name {
            const ONE: Self = $name(<$p3_type>::new(1));
        }

        impl One for $name {
            fn one() -> Self {
                Self(<$p3_type>::one())
            }

            fn is_one(&self) -> bool {
                self.to_canonical_u32() == 1
            }
        }

        impl FromStr for $name {
            type Err = String;
            fn from_str(s: &str) -> Result<Self, Self::Err> {
                let n = BigUint::from_str(s).map_err(|e| e.to_string())?;
                let modulus = Self::modulus();
                if n >= modulus.to_arbitrary_integer() {
                    Err(format!("Decimal number \"{s}\" too large for field."))
                } else {
                    Ok(n.into())
                }
            }
        }

        impl Neg for $name {
            type Output = Self;

            #[inline]
            fn neg(self) -> Self {
                Self(self.0.neg())
            }
        }

        impl Add for $name {
            type Output = Self;

            #[inline]
            fn add(self, rhs: Self) -> Self {
                Self(self.0.add(rhs.0))
            }
        }

        impl AddAssign for $name {
            #[inline]
            fn add_assign(&mut self, rhs: Self) {
                self.0.add_assign(rhs.0)
            }
        }

        impl Sub for $name {
            type Output = Self;

            #[inline]
            fn sub(self, rhs: Self) -> Self {
                Self(self.0.sub(rhs.0))
            }
        }

        impl SubAssign for $name {
            #[inline]
            fn sub_assign(&mut self, rhs: Self) {
                self.0.sub_assign(rhs.0)
            }
        }

        impl Mul for $name {
            type Output = Self;

            fn mul(self, rhs: Self) -> Self {
                Self(self.0.mul(rhs.0))
            }
        }

        impl MulAssign for $name {
            fn mul_assign(&mut self, rhs: Self) {
                self.0.mul_assign(rhs.0)
            }
        }

        impl Div for $name {
            type Output = Self;

            fn div(self, rhs: Self) -> Self::Output {
                Self(self.0.div(rhs.0))
            }
        }

        impl JsonSchema for $name {
            fn schema_name() -> String {
                "$name".to_string()
            }

            fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> Schema {
                // Since $p3_type is just a wrapper around u32, use the schema for u32
                let u32_schema = gen.subschema_for::<u32>();

                SchemaObject {
                    // Define the schema for $name, where field is of type $p3_type (which is u32)
                    instance_type: Some(schemars::schema::InstanceType::Object.into()),
                    object: Some(Box::new(schemars::schema::ObjectValidation {
                        properties: vec![("field".to_string(), u32_schema)]
                            .into_iter()
                            .collect(),
                        required: BTreeSet::from(["field".to_string()]), // Convert Vec to BTreeSet
                        ..Default::default()
                    })),
                    ..Default::default()
                }
                .into()
            }
        }

        #[derive(
            Clone,
            Copy,
            PartialEq,
            Eq,
            Debug,
            Default,
            PartialOrd,
            Ord,
            Hash,
            derive_more::Display,
            Serialize,
            Deserialize,
            JsonSchema,
            derive_more::Mul,
            derive_more::Add,
            derive_more::Sub,
            derive_more::AddAssign,
            derive_more::SubAssign,
            derive_more::MulAssign,
            derive_more::Shr,
            derive_more::Shl,
            derive_more::BitAnd,
            derive_more::BitOr,
            derive_more::BitXor,
            derive_more::BitAndAssign,
            derive_more::BitOrAssign,
            derive_more::BitXorAssign,
        )]
        pub struct BBLargeInt(u32);

        impl LargeInt for BBLargeInt {
            const MAX: Self = Self(u32::MAX);
            const NUM_BITS: usize = 32;

            fn to_arbitrary_integer(self) -> ibig::UBig {
                self.0.into()
            }

            fn num_bits(&self) -> usize {
                Self::NUM_BITS - self.0.leading_zeros() as usize
            }

            fn one() -> Self {
                Self(1)
            }

            fn is_one(&self) -> bool {
                self.0 == 1
            }

            fn try_into_u64(&self) -> Option<u64> {
                Some(self.0 as u64)
            }

            fn try_into_u32(&self) -> Option<u32> {
                Some(self.0)
            }

            fn from_hex(s: &str) -> Self {
                Self(u32::from_str_radix(s, 16).unwrap())
            }
        }

        impl From<u32> for BBLargeInt {
            fn from(value: u32) -> Self {
                Self(value)
            }
        }

        impl From<u64> for BBLargeInt {
            fn from(value: u64) -> Self {
                Self(value as u32)
            }
        }

        impl Zero for BBLargeInt {
            fn zero() -> Self {
                Self(0)
            }

            fn is_zero(&self) -> bool {
                self.0 == 0
            }
        }

        impl ConstZero for BBLargeInt {
            const ZERO: Self = Self(0);
        }

        impl LowerHex for BBLargeInt {
            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
                LowerHex::fmt(&self.0, f)
            }
        }

        impl Not for BBLargeInt {
            type Output = Self;

            fn not(self) -> Self::Output {
                Self(!self.0)
            }
        }
    };
}


================================================
FILE: number/src/serialize.rs
================================================
use std::{
    fs::File,
    io::{self, BufWriter, Read, Write},
    path::Path,
};

use ark_serialize::{CanonicalDeserialize, CanonicalSerialize, Compress, Validate};
use csv::{Reader, Writer};
use serde::{de::DeserializeOwned, Serialize};
use serde_with::{DeserializeAs, SerializeAs};

use crate::FieldElement;

#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
pub enum CsvRenderMode {
    SignedBase10,
    UnsignedBase10,
    #[default]
    Hex,
}

const ROW_NAME: &str = "Row";

pub fn write_polys_csv_file<T: FieldElement>(
    file: impl Write,
    render_mode: CsvRenderMode,
    polys: &[(&String, &[T])],
) {
    let mut writer = Writer::from_writer(file);

    // Write headers, adding a "Row" column
    let mut headers = vec![ROW_NAME];
    headers.extend(polys.iter().map(|(name, _)| {
        assert!(*name != ROW_NAME);
        name.as_str()
    }));
    writer.write_record(&headers).unwrap();

    let max_len = polys.iter().map(|p| p.1.len()).max().unwrap();
    for row_index in 0..max_len {
        let mut row = Vec::new();
        row.push(format!("{row_index}"));
        for (_, values) in polys {
            let value = values
                .get(row_index)
                .map(|v| match render_mode {
                    CsvRenderMode::SignedBase10 => format!("{v}"),
                    CsvRenderMode::UnsignedBase10 => format!("{}", v.to_integer()),
                    CsvRenderMode::Hex => format!("0x{:x}", v.to_integer()),
                })
                .unwrap_or_default();
            row.push(value);
        }
        writer.write_record(&row).unwrap();
    }

    writer.flush().unwrap();
}

pub fn read_polys_csv_file<T: FieldElement>(file: impl Read) -> Vec<(String, Vec<T>)> {
    let mut reader = Reader::from_reader(file);
    let headers = reader.headers().unwrap();

    let mut polys = headers
        .iter()
        .map(|name| (name.to_string(), Vec::new()))
        .collect::<Vec<_>>();

    for result in reader.records() {
        let record = result.unwrap();
        for (idx, value) in record.iter().enumerate() {
            // shorter polys/columns end in empty cells
            if value.trim().is_empty() {
                continue;
            }
            let value = if let Some(value) = value.strip_prefix("0x") {
                T::from_str_radix(value, 16).unwrap()
            } else if let Some(value) = value.strip_prefix('-') {
                -T::from_str(value).unwrap()
            } else {
                T::from_str(value).unwrap()
            };
            polys[idx].1.push(value);
        }
    }

    // Remove "Row" column, which was added by write_polys_csv_file()
    polys
        .into_iter()
        .filter(|(name, _)| name != ROW_NAME)
        .collect()
}

pub fn buffered_write_file<R>(
    path: &Path,
    do_write: impl FnOnce(&mut BufWriter<File>) -> R,
) -> Result<R, io::Error> {
    let mut writer = BufWriter::new(File::create(path)?);
    let result = do_write(&mut writer);
    writer.flush()?;

    Ok(result)
}

pub trait ReadWrite {
    fn read(file: &mut impl Read) -> Self;
    fn write(&self, path: &Path) -> Result<(), serde_cbor::Error>;
}

impl<T: DeserializeOwned + Serialize> ReadWrite for T {
    fn read(file: &mut impl Read) -> Self {
        serde_cbor::from_reader(file).unwrap()
    }
    fn write(&self, path: &Path) -> Result<(), serde_cbor::Error> {
        buffered_write_file(path, |writer| serde_cbor::to_writer(writer, &self))??;
        Ok(())
    }
}

// Serde wrappers for serialize/deserialize

pub fn ark_se<S, A: CanonicalSerialize>(a: &A, s: S) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
{
    let mut bytes = vec![];
    a.serialize_with_mode(&mut bytes, Compress::Yes)
        .map_err(serde::ser::Error::custom)?;
    serde_with::Bytes::serialize_as(&bytes, s)
}

pub fn ark_de<'de, D, A: CanonicalDeserialize>(data: D) -> Result<A, D::Error>
where
    D: serde::de::Deserializer<'de>,
{
    let s: Vec<u8> = serde_with::Bytes::deserialize_as(data)?;
    let a = A::deserialize_with_mode(s.as_slice(), Compress::Yes, Validate::Yes);
    a.map_err(serde::de::Error::custom)
}

#[cfg(test)]
mod tests {
    use crate::Bn254Field;
    use std::io::Cursor;

    use super::*;
    use test_log::test;

    fn test_polys() -> Vec<(String, Vec<Bn254Field>)> {
        vec![
            ("a".to_string(), (0..16).map(Bn254Field::from).collect()),
            ("b".to_string(), (-16..0).map(Bn254Field::from).collect()),
        ]
    }

    #[test]
    fn write_read() {
        let mut buf: Vec<u8> = vec![];

        let polys = test_polys();

        serde_cbor::to_writer(&mut buf, &polys).unwrap();
        let read_polys: Vec<(String, Vec<Bn254Field>)> = ReadWrite::read(&mut Cursor::new(buf));

        assert_eq!(read_polys, polys);
    }

    #[test]
    fn write_read_csv() {
        let polys = test_polys()
            .into_iter()
            .map(|(name, values)| (name.to_string(), values))
            .collect::<Vec<_>>();
        let polys_ref = polys
            .iter()
            .map(|(name, values)| (name, values.as_slice()))
            .collect::<Vec<_>>();

        for render_mode in &[
            CsvRenderMode::SignedBase10,
            CsvRenderMode::UnsignedBase10,
            CsvRenderMode::Hex,
        ] {
            let mut buf: Vec<u8> = vec![];
            write_polys_csv_file(&mut buf, *render_mode, &polys_ref);
            let read_polys = read_polys_csv_file::<Bn254Field>(&mut Cursor::new(buf));

            assert_eq!(read_polys, polys);
        }
    }
}


================================================
FILE: number/src/traits.rs
================================================
use std::{
    fmt::{self, Display},
    hash::Hash,
    ops::*,
    str::FromStr,
};

use ibig::IBig;
use num_traits::{ConstOne, ConstZero, One, Zero};
use schemars::JsonSchema;
use serde::{de::DeserializeOwned, Deserialize, Serialize};

use crate::{BigUint, DegreeType};

/// A fixed-width integer type
pub trait LargeInt:
    Copy
    + Send
    + Sync
    + PartialEq
    + Eq
    + PartialOrd
    + Ord
    + Hash
    + From<u64>
    + BitAnd<Output = Self>
    + BitOr<Output = Self>
    + BitOrAssign
    + BitAndAssign
    + AddAssign
    + Add<Output = Self>
    + SubAssign
    + Sub<Output = Self>
    + fmt::Display
    + fmt::Debug
    + Copy
    + Not<Output = Self>
    + Shl<usize, Output = Self>
    + Shr<usize, Output = Self>
    + BitXor<Output = Self>
    + Zero
    + ConstZero
    + fmt::LowerHex
{
    /// The largest value of this type, i.e. 2**NUM_BITS - 1
    const MAX: Self;
    /// Number of bits of this base type. Not to be confused with the number of bits
    /// of the field elements!
    const NUM_BITS: usize;
    fn to_arbitrary_integer(self) -> BigUint;
    /// Number of bits required to encode this particular number.
    fn num_bits(&self) -> usize;

    /// Returns the constant one.
    /// We are not implementing num_traits::One because it also requires multiplication.
    fn one() -> Self;

    /// Checks if the number is one.
    fn is_one(&self) -> bool;

    /// Tries to convert to u64.
    ///
    /// Returns None if value is out of u64 range.
    fn try_into_u64(&self) -> Option<u64>;

    /// Tries to convert to u32.
    ///
    /// Returns None if value is out of u32 range.
    fn try_into_u32(&self) -> Option<u32>;

    /// Creates a LargeInt from a hex string.
    /// Panics on failure - intended for testing.
    fn from_hex(s: &str) -> Self;
}

pub enum FieldSize {
    /// Fields that fit a 29-Bit number, but not much more.
    Small,
    /// Fields that at least fit a product of two 32-Bit numbers
    /// (Goldilocks and larger)
    Large,
}

#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum KnownField {
    BabyBearField,
    KoalaBearField,
    Mersenne31Field,
    GoldilocksField,
    Bn254Field,
}

impl KnownField {
    pub fn field_size(&self) -> FieldSize {
        match self {
            KnownField::BabyBearField
            | KnownField::KoalaBearField
            | KnownField::Mersenne31Field => FieldSize::Small,
            KnownField::GoldilocksField | KnownField::Bn254Field => FieldSize::Large,
        }
    }
}

impl Display for KnownField {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            KnownField::BabyBearField => write!(f, "BabyBear"),
            KnownField::KoalaBearField => write!(f, "KoalaBear"),
            KnownField::Mersenne31Field => write!(f, "Mersenne31"),
            KnownField::GoldilocksField => write!(f, "Goldilocks"),
            KnownField::Bn254Field => write!(f, "Bn254"),
        }
    }
}

/// A field element
pub trait FieldElement:
    'static
    + Sync
    + Send
    + Default
    + Copy
    + PartialEq
    + Eq
    + Send
    + Sync
    + PartialOrd
    + Ord
    + Hash
    + Add<Output = Self>
    + AddAssign
    + Sub<Output = Self>
    + SubAssign
    + Mul<Output = Self>
    + MulAssign
    + Div<Output = Self>
    + Neg<Output = Self>
    + Zero
    + ConstZero
    + ConstOne
    + One
    + fmt::Display
    + fmt::Debug
    + From<Self::Integer>
    + From<crate::BigUint>
    + FromStr<Err = String>
    + From<u32>
    + From<u64>
    + From<i32>
    + From<i64>
    + From<bool>
    + fmt::LowerHex
    + Serialize
    + DeserializeOwned
    + JsonSchema
{
    /// The underlying fixed-width integer type
    type Integer: LargeInt;
    /// Number of bits required to represent elements of this field.
    const BITS: u32;

    fn to_degree(&self) -> DegreeType;

    fn to_integer(&self) -> Self::Integer;

    fn to_arbitrary_integer(&self) -> BigUint {
        self.to_integer().to_arbitrary_integer()
    }

    fn modulus() -> Self::Integer;

    fn pow(self, exponent: Self::Integer) -> Self;

    fn to_bytes_le(&self) -> Vec<u8>;

    fn from_bytes_le(bytes: &[u8]) -> Self;

    fn from_str_radix(s: &str, radix: u32) -> Result<Self, String>;

    /// Only converts the value to a field element if it is less than the modulus.
    fn checked_from(value: BigUint) -> Option<Self>;

    /// Returns true if the value is in the "lower half" of the field,
    /// i.e. the value <= (modulus() - 1) / 2
    fn is_in_lower_half(&self) -> bool;

    /// If the field is a known field (as listed in the `KnownField` enum), returns the field variant.
    fn known_field() -> Option<KnownField>;

    /// Converts to a signed integer.
    ///
    /// Negative values are in relation to 0 in the field.
    /// Values up to the modulus / 2 are positive, values above are negative.
    fn to_signed_integer(&self) -> IBig {
        if self.is_in_lower_half() {
            self.to_arbitrary_integer().into()
        } else {
            IBig::from(self.to_arbitrary_integer())
                - IBig::from(Self::modulus().to_arbitrary_integer())
        }
    }

    /// Returns `true` if values of this type are directly stored as their integer
    /// value, i.e
    /// - montgomery representation is not used
    /// - values are always canonical (i.e. smaller than the modulus)
    /// - there are no additional fields and
    /// - `repr(transparent)` is used.
    ///
    /// In other words, the `to_integer` function can be implemented as
    /// a mem::transmute operation on pointers.
    fn has_direct_repr() -> bool;
}

#[cfg(test)]
pub fn int_from_hex_str<T: FieldElement>(s: &str) -> T::Integer {
    T::Integer::from_hex(s)
}


================================================
FILE: openvm/Cargo.toml
================================================
[package]
name = "powdr-openvm"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[features]
default = []
aot = ["openvm-sdk/aot"]
tco = ["openvm-sdk/tco"]
metrics = ["openvm-sdk/metrics", "openvm-stark-backend/metrics", "openvm-stark-sdk/metrics"]
cuda = ["openvm-sdk/cuda", "openvm-circuit-primitives/cuda", "dep:openvm-cuda-backend", "dep:openvm-cuda-common", "dep:openvm-cuda-builder"]
test-utils = ["dep:pretty_assertions"]

[dependencies]
itertools.workspace = true
openvm-circuit.workspace = true
openvm-circuit-primitives.workspace = true
openvm-instructions.workspace = true
openvm-stark-backend.workspace = true
openvm-stark-sdk.workspace = true
powdr-autoprecompiles.workspace = true
powdr-expression.workspace = true
powdr-openvm-bus-interaction-handler.workspace = true
openvm-circuit-derive.workspace = true
openvm-circuit-primitives-derive.workspace = true
openvm-sdk.workspace = true
openvm-cuda-backend = { workspace = true, optional = true }
openvm-cuda-common = { workspace = true, optional = true }
openvm-transpiler.workspace = true
openvm-native-circuit.workspace = true
serde.workspace = true
powdr-number.workspace = true
derive_more.workspace = true
powdr-constraint-solver.workspace = true
tracing.workspace = true
rustc-demangle = "0.1.25"
metrics.workspace = true
indicatif = "0.18.3"
cfg-if = "1.0.4"
powdr-riscv-elf.workspace = true
pretty_assertions = { workspace = true, optional = true }

[build-dependencies]
openvm-cuda-builder = { workspace = true, optional = true }


================================================
FILE: openvm/build.rs
================================================
#[cfg(feature = "cuda")]
use openvm_cuda_builder::{cuda_available, CudaBuilder};

fn main() {
    #[cfg(feature = "cuda")]
    {
        if !cuda_available() {
            return; // Skip CUDA compilation
        }

        let builder: CudaBuilder = CudaBuilder::new()
            .include_from_dep("DEP_CIRCUIT_PRIMITIVES_CUDA_INCLUDE") // Point to header file folder of crate with path `DEP_CIRCUIT_PRIMITIVES_CUDA_INCLUDE`
            .include_from_dep("DEP_CUDA_COMMON_INCLUDE") // The only dependency of our dependency `DEP_CIRCUIT_PRIMITIVES_CUDA_INCLUDE`
            .include("cuda/include") // Point to header file folder of this crate
            .watch("cuda") // Watch file changes of this crate for recompilation
            .library_name("powdr_gpu") // Library name of this crate; doesn't affect import name
            .files_from_glob("cuda/src/**/*.cu"); // Import all `.cu` files with zero or more nested sub-folders under `cuda/src`of this crate

        builder.emit_link_directives();
        builder.build();
    }
}


================================================
FILE: openvm/cuda/src/apc_apply_bus.cu
================================================
#include <stdint.h>
#include <assert.h>
#include <stdio.h>
#include "primitives/buffer_view.cuh"
#include "primitives/constants.h"
#include "primitives/trace_access.h"
#include "primitives/histogram.cuh"
#include "expr_eval.cuh"

extern "C" {
  typedef struct {
    uint32_t bus_id; // Bus id this interaction targets (matches periphery chip bus id)
    uint32_t num_args; // Number of argument expressions for this interaction
    uint32_t args_index_off; // Starting index into the `ExprSpan` array for this interaction's args. Layout: [mult, arg0, arg1, ...]
  } DevInteraction;
}

// Fixed number of bits for bitwise lookup
static constexpr uint32_t BITWISE_NUM_BITS = 8u;

// Applies bus interactions to periphery histograms for a batch of APC rows
__global__ void apc_apply_bus_kernel(
  // APC related
  const Fp* __restrict__ d_output, // APC trace (column-major)
  int num_apc_calls, // number of APC calls (rows)

  // Interaction related
  const uint32_t* __restrict__ d_bytecode, // bytecode for stack-machine expressions
  size_t bc_len, // bytecode length (u32 words)
  const DevInteraction* __restrict__ d_interactions, // interactions array
  size_t n_interactions, // number of interactions
  const ExprSpan* __restrict__ d_arg_spans, // argument spans array
  size_t n_arg_spans, // number of arg spans

  // Variable range checker related
  uint32_t var_range_bus_id, // variable range checker bus id
  uint32_t* __restrict__ d_var_hist, // variable range histogram buffer
  size_t var_num_bins, // variable range histogram bin count

  // Tuple range checker related
  uint32_t tuple2_bus_id, // 2-tuple range checker bus id
  uint32_t* __restrict__ d_tuple2_hist, // tuple2 histogram buffer
  uint32_t tuple2_sz0, // tuple2 size dim0
  uint32_t tuple2_sz1, // tuple2 size dim1

  // Bitwise related
  uint32_t bitwise_bus_id, // bitwise lookup bus id
  uint32_t* __restrict__ d_bitwise_hist // bitwise lookup histogram buffer
) {
  // The warp this thread belongs to, as a CUDA warp is 32 threads
  const int warp = (threadIdx.x >> 5);
  // The thread's position within this wrap
  const int lane = (threadIdx.x & 31);
  // The number of warps in a block
  const int warps_per_block = (blockDim.x >> 5);

  // Each bus interaction is processed by one warp
  for (int i = blockIdx.x * warps_per_block + warp; i < (int)n_interactions; i += gridDim.x * warps_per_block) {
    DevInteraction intr = d_interactions[i];

    // Each row is processed by one lane
    for (int r = lane; r < num_apc_calls; r += 32) {
      // multiplicity is stored as the first ExprSpan for this interaction
      ExprSpan mult_span = d_arg_spans[intr.args_index_off + 0];
      Fp mult = eval_arg(mult_span, d_bytecode, d_output, (size_t)r);
      // Evaluate args and apply based on bus id
      if (intr.bus_id == var_range_bus_id) {
        // expect [value, max_bits]
        ExprSpan s0 = d_arg_spans[intr.args_index_off + 1];
        ExprSpan s1 = d_arg_spans[intr.args_index_off + 2];
        Fp v_fp = eval_arg(s0, d_bytecode, d_output, (size_t)r);
        Fp b_fp = eval_arg(s1, d_bytecode, d_output, (size_t)r);
        
        // histogram `num_bins` and index calculation depend on the `VariableRangeCheckerChipGPU` implementation
        uint32_t value = v_fp.asUInt32();
        uint32_t max_bits = b_fp.asUInt32();
        lookup::Histogram hist(d_var_hist, (uint32_t)var_num_bins);
        uint32_t idx = (1u << max_bits) + value; // `max_bit` 

        // apply multiplicity by looping; warp-level dedup in Histogram minimizes contention
        for (uint32_t k = 0; k < (uint32_t)mult.asUInt32(); ++k) hist.add_count(idx);
      } else if (intr.bus_id == tuple2_bus_id) {
        // expect [v0, v1]
        ExprSpan s0 = d_arg_spans[intr.args_index_off + 1];
        ExprSpan s1 = d_arg_spans[intr.args_index_off + 2];
        Fp v0_fp = eval_arg(s0, d_bytecode, d_output, (size_t)r);
        Fp v1_fp = eval_arg(s1, d_bytecode, d_output, (size_t)r);
        
        // histogram `num_bins` and index calculation depend on the `RangeTupleCheckerChipGpu<2>` implementation
        uint32_t v0 = v0_fp.asUInt32();
        uint32_t v1 = v1_fp.asUInt32();
        lookup::Histogram hist(d_tuple2_hist, tuple2_sz0 * tuple2_sz1);
        uint32_t idx = v0 * tuple2_sz1 + v1;
        
        for (uint32_t k = 0; k < (uint32_t)mult.asUInt32(); ++k) hist.add_count(idx);
      } else if (intr.bus_id == bitwise_bus_id) {
        // expect [x, y, x_xor_y, selector]; we only update histogram if selector==range(0) or xor(1)
        ExprSpan s0 = d_arg_spans[intr.args_index_off + 1];
        ExprSpan s1 = d_arg_spans[intr.args_index_off + 2];
        ExprSpan s2 = d_arg_spans[intr.args_index_off + 3];
        ExprSpan s3 = d_arg_spans[intr.args_index_off + 4];
        Fp x_fp = eval_arg(s0, d_bytecode, d_output, (size_t)r);
        Fp y_fp = eval_arg(s1, d_bytecode, d_output, (size_t)r);
        Fp xy_fp = eval_arg(s2, d_bytecode, d_output, (size_t)r);
        Fp sel_fp = eval_arg(s3, d_bytecode, d_output, (size_t)r);

        uint32_t x = x_fp.asUInt32();
        uint32_t y = y_fp.asUInt32();
        uint32_t xy = xy_fp.asUInt32();
        uint32_t selector = sel_fp.asUInt32();
        BitwiseOperationLookup bl(d_bitwise_hist, BITWISE_NUM_BITS);
        
        for (uint32_t k = 0; k < (uint32_t)mult.asUInt32(); ++k) {
          if (selector == 0u) bl.add_range(x, y);
          else if (selector == 1u) { bl.add_xor(x, y); /* could assert xy correctness on device if needed */ }
          else { assert(false && "Invalid selector"); }
        }
        (void)xy;
      }
    }
  }
}

// ============================================================================================
// Host launcher wrapper — callable from Rust FFI or cudarc
// ============================================================================================

extern "C" int _apc_apply_bus(
  // APC related
  const Fp* d_output, // APC trace (column-major), device pointer
  int num_apc_calls, // number of APC calls (rows)

  // Interaction related
  const uint32_t* d_bytecode, // bytecode buffer (device)
  size_t bytecode_len, // length of bytecode (u32 words)
  const DevInteraction* d_interactions, // interactions array (device)
  size_t n_interactions, // number of interactions
  const ExprSpan* d_arg_spans, // argument spans (device)
  size_t n_arg_spans, // number of arg spans

  // Variable range checker related
  uint32_t var_range_bus_id, // variable range checker bus id
  uint32_t* d_var_hist, // variable range histogram (device)
  size_t var_num_bins, // number of bins in variable range histogram

  // Tuple range checker related
  uint32_t tuple2_bus_id, // 2-tuple range checker bus id
  uint32_t* d_tuple2_hist, // tuple2 histogram (device)
  uint32_t tuple2_sz0, // tuple2 size dim0
  uint32_t tuple2_sz1, // tuple2 size dim1

  // Bitwise related
  uint32_t bitwise_bus_id, // bitwise lookup bus id
  uint32_t* d_bitwise_hist // bitwise lookup histogram (device)
) {
  const int block_x = 256; // 8 warps
  const dim3 block(block_x, 1, 1);
  const unsigned warps_per_block = (unsigned)(block_x / 32);
  size_t g_size = (n_interactions + (size_t)warps_per_block - 1) / (size_t)warps_per_block;
  unsigned g = (unsigned)g_size;
  if (g == 0u) g = 1u;
  const dim3 grid(g, 1, 1); // each warp processes an interaction

  apc_apply_bus_kernel<<<grid, block>>>(
    // APC related
    d_output, num_apc_calls,

    // Interaction related
    d_bytecode, bytecode_len, d_interactions, n_interactions, d_arg_spans, n_arg_spans,

    // Variable range checker related
    var_range_bus_id, d_var_hist, var_num_bins,

    // Tuple range checker related
    tuple2_bus_id, d_tuple2_hist, tuple2_sz0, tuple2_sz1,

    // Bitwise related
    bitwise_bus_id, d_bitwise_hist
  );
  return (int)cudaGetLastError();
}


================================================
FILE: openvm/cuda/src/apc_tracegen.cu
================================================
#include "primitives/buffer_view.cuh"
#include "primitives/constants.h"
#include "primitives/trace_access.h"
#include "expr_eval.cuh"

// ============================================================================================
// Types
// ============================================================================================

struct OriginalAir {
    int width;               // number of columns
    int height;              // number of rows (Ha)
    const Fp* buffer;        // column-major base: col*height + row
    int row_block_size;      // stride between used rows
};

struct Subst {
    int air_index; // index into d_original_airs
    int col;      // source column within this AIR
    int row;      // base row offset within the row-block
    int apc_col;  // destination APC column
};

extern "C" {
  typedef struct {
    uint64_t col_base; // precomputed destination base offset = apc_col_index * H
    ExprSpan span;   // expression span encoding this column's value
  } DerivedExprSpec;
}

// ============================================================================================
// Kernel: each thread iterates rows and processes all substitutions.
// ============================================================================================

__global__ void apc_tracegen_kernel(
    Fp* __restrict__ d_output,                         // column-major
    size_t H,                                          // height of the output
    const OriginalAir* __restrict__ d_original_airs,   // metadata per AIR
    const Subst* __restrict__ d_subs,                  // all substitutions
    size_t n_subs,                                     // number of substitutions
    int num_apc_calls                                  // number of APC calls
) {
    const size_t total_threads = (size_t)gridDim.x * (size_t)blockDim.x;
    const size_t tid = (size_t)blockIdx.x * (size_t)blockDim.x + (size_t)threadIdx.x;

    for (size_t r = tid; r < H; r += total_threads) {
        const bool row_in_range = r < (size_t)num_apc_calls;

        for (size_t i = 0; i < n_subs; ++i) {
            const Subst sub = d_subs[i];
            const size_t dst_idx = (size_t)sub.apc_col * H + r;

            if (!row_in_range) {
                d_output[dst_idx] = Fp(0);
                continue;
            }

            const size_t air_idx = (size_t)sub.air_index;
            const OriginalAir air = d_original_airs[air_idx];
            const Fp* __restrict__ src_base = air.buffer;
            const size_t src_col_base = (size_t)sub.col * (size_t)air.height;
            const size_t src_r = (size_t)sub.row + r * (size_t)air.row_block_size;
            d_output[dst_idx] = src_base[src_col_base + src_r];
        }
    }
}

// ============================================================================================
// Derived expressions: lane-per-row evaluator, sequential over derived columns per row
// ============================================================================================

__global__ void apc_apply_derived_expr_kernel(
    Fp* __restrict__ d_output,   // APC trace (column-major)
    size_t H,                    // rows (height)
    int num_apc_calls,           // number of valid rows
    const DerivedExprSpec* __restrict__ d_specs, // derived expression specs
    size_t n_cols,               // number of derived columns
    const uint32_t* __restrict__ d_bytecode // shared bytecode buffer
) {
    const size_t total_threads = (size_t)gridDim.x * (size_t)blockDim.x;
    const size_t tid = (size_t)blockIdx.x * (size_t)blockDim.x + (size_t)threadIdx.x;

    for (size_t r = tid; r < H; r += total_threads) {
        if (r < (size_t)num_apc_calls) {
            // Compute and write each derived column for this row
            for (size_t i = 0; i < n_cols; ++i) {
                const DerivedExprSpec spec = d_specs[i];
                const size_t col_base = (size_t)spec.col_base;
                const Fp v = eval_arg(spec.span, d_bytecode, d_output, r);
                d_output[col_base + r] = v;
            }
        } else {
            // Zero-fill non-APC rows
            for (size_t i = 0; i < n_cols; ++i) {
                const size_t col_base = (size_t)d_specs[i].col_base;
                d_output[col_base + r] = Fp(0);
            }
        }
    }
}

// ============================================================================================
// Host launcher wrappers — callable from Rust FFI or cudarc
// ============================================================================================

extern "C" int _apc_apply_derived_expr(
    Fp*                d_output,
    size_t             H,
    int                num_apc_calls,
    const DerivedExprSpec* d_specs,
    size_t             n_cols,
    const uint32_t*    d_bytecode
) {
    if (n_cols == 0) return 0;
    const int block_x = 256; // more lanes to cover rows
    const dim3 block(block_x, 1, 1);
    unsigned g = (unsigned)((H + block_x - 1) / block_x);
    if (g == 0u) g = 1u;
    const dim3 grid(g, 1, 1);
    apc_apply_derived_expr_kernel<<<grid, block>>>(
        d_output, H, num_apc_calls, d_specs, n_cols, d_bytecode
    );
    return (int)cudaGetLastError();
}

extern "C" int _apc_tracegen(
    Fp*                      d_output,          // [output_height * output_width], column-major
    size_t                   output_height,     // H_out
    const OriginalAir*       d_original_airs,   // device array of AIR metadata
    const Subst*             d_subs,            // device array of all substitutions
    size_t                   n_subs,            // number of substitutions
    int                      num_apc_calls      // number of APC calls
) {
    assert((output_height & (output_height - 1)) == 0);  // power-of-two height check

    const int block_x = 256;
    const dim3 block(block_x, 1, 1);
    unsigned g = (unsigned)((output_height + block_x - 1) / block_x);
    if (g == 0u) g = 1u;
    const dim3 grid(g, 1, 1);

    apc_tracegen_kernel<<<grid, block>>>(
        d_output, output_height, d_original_airs, d_subs, n_subs, num_apc_calls
    );
    return (int)cudaGetLastError();
}


================================================
FILE: openvm/cuda/src/expr_eval.cuh
================================================
#pragma once

#include <stdint.h>
#include <assert.h>

// This header provides a tiny stack-machine evaluator for algebraic expressions
// used by both bus and derived-expression evaluation kernels.
//
// It assumes the including translation unit has included the definitions of `Fp`,
// `inv`, and any required primitives.

enum OpCode : uint32_t {
  OP_PUSH_APC = 0, // Push the APC value onto the stack. Must be followed by the index of the value in the APC device buffer.
  OP_PUSH_CONST = 1, // Push a constant value onto the stack. Must be followed by the constant value.
  OP_ADD = 2, // Add the top two values on the stack.
  OP_SUB = 3, // Subtract the top two values on the stack.
  OP_MUL = 4, // Multiply the top two values on the stack.
  OP_NEG = 5, // Negate the top value on the stack.
  OP_INV_OR_ZERO = 6, // Invert the top value on the stack if it is not zero, otherwise pop and push zero.
};

static constexpr int STACK_CAPACITY = 16;

// Inline helpers to safely manipulate the evaluation stack
__device__ __forceinline__ void stack_push(Fp* stack, int& sp, Fp value) {
  assert(sp < STACK_CAPACITY && "Stack overflow");
  stack[sp++] = value;
}

__device__ __forceinline__ Fp stack_pop(Fp* stack, int& sp) {
  assert(sp > 0 && "Stack underflow");
  return stack[--sp];
}

// Evaluate expression encoded as u32 bytecode starting at `expr` for length `len` on a given APC row `r` of `apc_trace`.
__device__ __forceinline__ Fp eval_expr(const uint32_t* expr, uint32_t len,
                                        const Fp* __restrict__ apc_trace, size_t r) {
  Fp stack[STACK_CAPACITY];
  int sp = 0;
  for (uint32_t ip = 0; ip < len;) {
    const uint32_t op = expr[ip++];
    switch (op) {
      case OP_PUSH_APC: {
        const uint32_t base = expr[ip++];
        stack_push(stack, sp, apc_trace[base + r]);
        break;
      }
      case OP_PUSH_CONST: {
        const uint32_t u = expr[ip++];
        stack_push(stack, sp, Fp(u));
        break;
      }
      case OP_ADD: {
        const Fp b = stack_pop(stack, sp);
        const Fp a = stack_pop(stack, sp);
        stack_push(stack, sp, a + b);
        break;
      }
      case OP_SUB: {
        const Fp b = stack_pop(stack, sp);
        const Fp a = stack_pop(stack, sp);
        stack_push(stack, sp, a - b);
        break;
      }
      case OP_MUL: {
        const Fp b = stack_pop(stack, sp);
        const Fp a = stack_pop(stack, sp);
        stack_push(stack, sp, a * b);
        break;
      }
      case OP_NEG: {
        const Fp a = stack_pop(stack, sp);
        stack_push(stack, sp, -a);
        break;
      }
      case OP_INV_OR_ZERO: {
        const Fp a = stack_pop(stack, sp);
        const Fp out = (a == Fp::zero()) ? Fp::zero() : inv(a);
        stack_push(stack, sp, out);
        break;
      }
      default: {
        assert(false && "Unknown opcode");
      }
    }
  }
  assert(sp == 1);
  return stack[sp - 1];
}

// Span (offset, length) of a sub-expression within a shared bytecode buffer
struct ExprSpan {
  uint32_t off;
  uint32_t len;
};

// Evaluate an argument span from a shared bytecode buffer for APC row `r`
__device__ __forceinline__ Fp eval_arg(
  const ExprSpan& span,
  const uint32_t* __restrict__ d_bytecode,
  const Fp* __restrict__ apc_trace,
  size_t r
) {
  return eval_expr(d_bytecode + span.off, span.len, apc_trace, r);
}


================================================
FILE: openvm/metrics-viewer/CLAUDE.md
================================================
# Metrics Viewer

Single-page web app for visualizing proof metrics from OpenVM benchmarks. This is a web port of the Python scripts [`basic_metrics.py`](../../openvm-riscv/scripts/basic_metrics.py) and [`plot_trace_cells.py`](../../openvm-riscv/scripts/plot_trace_cells.py), following the same pattern as the [autoprecompile-analyzer](../../autoprecompile-analyzer/index.html).

The goal is to make benchmark results shareable via URL without needing a Python environment.

## Project Structure
```
index.html          # SPA with embedded JS/CSS (D3.js v7, Bootstrap 5.3)
spec.py             # Python reference implementation of metric computations (for auditing)
CLAUDE.md           # This file
```

## Data Format

Input can be either of these formats:

1. **Combined metrics JSON** — produced by `basic_metrics.py combine`. It maps run names to raw metrics objects:

```json
{
  "<run_name>": {
    "counter": [
      { "labels": [["group", "app_proof"], ["air_name", "SomeAir"], ["segment", "0"], ...], "metric": "cells", "value": "123456" },
      ...
    ],
    "gauge": [
      { "labels": [["group", "app_proof"], ...], "metric": "total_proof_time_ms", "value": "45678" },
      ...
    ]
  },
  "<run_name_2>": { ... }
}
```

2. **Raw metrics JSON** — a single experiment object with top-level `counter` and `gauge` keys:

```json
{
  "counter": [ ... ],
  "gauge": [ ... ]
}
```

If the top-level object has both `counter` and `gauge`, the viewer treats it as a raw metrics file and renders a single experiment. Otherwise it treats the object as combined metrics and validates each experiment entry.

Each entry in `counter` / `gauge` must have:
- `labels`: Array of `[key, value]` pairs.
- `metric`: Metric name string.
- `value`: String-encoded numeric value (or numeric in practice; the UI accepts both).

### OpenVM 1 Schema

Produced by OpenVM 1 (STARK-based prover with FRI).

**Label keys**: `group`, `air_name`, `air_id`, `segment`, `idx`, `trace_height_constraint`.

**Group values**: `app_proof`, `leaf_*` (e.g. `leaf_0`), `internal_*` (e.g. `internal_0`).

**Counter metrics** (with `group`):
- `cells`, `rows`, `main_cols`, `prep_cols`, `perm_cols` — trace dimensions (per AIR/segment)
- `total_cells`, `total_cells_used`, `main_cells_used` — cell counts including padding (per segment, no `air_name`)
- `constraints`, `interactions` — per-AIR constraint/interaction counts (no `group`/`segment` labels)
- `quotient_deg`, `fri.log_blowup`, `num_children` — FRI/quotient parameters
- `threshold`, `weighted_sum`, `execute_metered_insns`, `execute_preflight_insns`

**Gauge metrics** (timing, with `group`):
- `total_proof_time_ms` — total time per group (app/leaf/internal)
- `stark_prove_excluding_trace_time_ms` — STARK prover time minus trace generation
- `trace_gen_time_ms`, `system_trace_gen_time_ms`, `single_trace_gen_time_ms`
- `execute_preflight_time_ms`, `execute_metered_time_ms`
- `main_trace_commit_time_ms`, `perm_trace_commit_time_ms`
- `generate_perm_trace_time_ms`, `memory_to_vec_partition_time_ms`
- `quotient_poly_compute_time_ms`, `quotient_poly_commit_time_ms`, `pcs_opening_time_ms`
- `single_leaf_agg_time_ms`, `single_internal_agg_time_ms`, `agg_layer_time_ms`
- `app_prove_time_ms`, `prove_segment_time_ms`
- `total_apc_gen_time_ms`, `memory_finalize_time_ms`, `compute_user_public_values_proof_time_ms`
- `dummy_proof_and_keygen_time_ms`

### OpenVM 2 Schema

Produced by OpenVM 2 (uses GKR/LogUp-based prover with WHIR).

**Label keys**: All V1 keys plus `air`, `module`, `phase`.

**Group values**: `app_proof`, `leaf`, `compression`, `internal_for_leaf`, `internal_recursive.0`, `internal_recursive.1`, `internal_recursive.2`.

Key differences from V1:
- `compression` is a new proving phase (not present in V1)
- `leaf` has no numeric suffix (V1 used `leaf_*`)
- Internal groups split into `internal_for_leaf` and `internal_recursive.N`

**Counter metrics**:
- Same as V1: `cells`, `rows`, `main_cols`, `prep_cols`, `perm_cols`, `total_cells`, `constraints`, `interactions`
- New: `constraint_deg` (replaces V1's `quotient_deg`)
- Removed: `total_cells_used`, `main_cells_used`, `quotient_deg`, `fri.log_blowup`, `num_children`

**Gauge metrics** — timing breakdown is hierarchical with `prover.*` prefix:
- Top-level (same as V1): `total_proof_time_ms`, `stark_prove_excluding_trace_time_ms`, `trace_gen_time_ms`, `execute_preflight_time_ms`, `execute_metered_time_ms`
- New `prover.*` sub-metrics:
  - `prover.main_trace_commit_time_ms` — trace commitment
  - `prover.rap_constraints_time_ms` — constraint evaluation (parent)
    - `prover.rap_constraints.logup_gkr_time_ms` — LogUp GKR
    - `prover.rap_constraints.logup_gkr.input_evals_time_ms`
    - `prover.rap_constraints.round0_time_ms`, `prover.rap_constraints.ple_round0_time_ms`, `prover.rap_constraints.mle_rounds_time_ms`
  - `prover.openings_time_ms` — opening proofs (parent)
    - `prover.openings.stacked_reduction_time_ms`, `prover.openings.whir_time_ms`
    - `prover.openings.stacked_reduction.round0_time_ms`, `prover.openings.stacked_reduction.mle_rounds_time_ms`
- New GPU metrics: `fractional_sumcheck_gpu_time_ms`, `prove_zerocheck_and_logup_gpu_time_ms`, `compute_merkle_precomputation_cuda_time_ms`
- New GPU memory: `gpu_mem.current_bytes`, `gpu_mem.local_peak_bytes`, `gpu_mem.reserved_bytes`, `gpu_mem.timestamp_ms`
- New phases: `compression_time_ms`, `generate_cached_trace_time_ms`, `generate_proving_ctxs_time_ms`, `generate_blob_time_ms`, `set_initial_memory_time_ms`
- `module` label: prover sub-module breakdown (e.g. `prover.merkle_tree`, `prover.openings`, `frac_sumcheck.*`, `tracegen.*`)
- `air` label: used by `generate_cached_trace_time_ms` and `single_trace_gen_time_ms` (contains full Rust type name, distinct from `air_name`)
- Removed: `dummy_proof_and_keygen_time_ms`, `generate_perm_trace_time_ms`, `perm_trace_commit_time_ms`, `quotient_poly_*_time_ms`, `pcs_opening_time_ms`, `memory_to_vec_partition_time_ms`

### Version Detection

The viewer auto-detects the OpenVM version by checking for `logup_gkr` in metric names (V2-only). The detected version is displayed as a badge in the navbar.

### Proof Time Hierarchy

In both V1 and V2, `execute_metered_time_ms` runs *before* segment proving and sits *outside* per-segment `total_proof_time_ms`. The viewer reports metered execution as a separate top-level phase and uses `sum(total_proof_time_ms)` for the app phase.

**V1**:
```
total = metered + sum(app.total_proof_time_ms) + leaf.total_proof_time_ms + internal.total_proof_time_ms
app.total_proof_time_ms ≈ sum_per_segment(preflight + trace_gen + stark_excl) + small overhead
```

**V2**:
```
total = metered + sum(app.total_proof_time_ms) + leaf.total_proof_time_ms + internal.total_proof_time_ms + compression.total_proof_time_ms
app.total_proof_time_ms ≈ sum_per_segment(preflight + set_initial_memory + trace_gen + stark_excl) + small overhead
stark_excl ≈ prover.main_trace_commit + prover.rap_constraints + prover.openings
```

The V2 stacked bar chart breaks STARK into three sub-components (constraints, openings, trace commit) plus a small "STARK other" residual.

Generate a combined file with:
```bash
python3 openvm-riscv/scripts/basic_metrics.py combine **/metrics.json > combined_metrics.json
```

Example input files:
- OpenVM 1 — Keccak: https://github.com/powdr-labs/bench-results/blob/gh-pages/results/2026-03-23-0535/keccak/combined_metrics.json
- OpenVM 1 — Reth (older format, no constraints/interactions): https://github.com/powdr-labs/bench-results/blob/gh-pages/results/2026-03-23-0535/reth/combined_metrics.json
- OpenVM 2 — Pairing: https://gist.githubusercontent.com/leonardoalt/3074cb729c03470b1116674618b97267/raw/eec5e5a086bf07a57e2215843f0a3f1ada9d0d5c/metrics_v2_pairing_combined.json

## Testing

Start server and open with example data:
```bash
cd openvm/metrics-viewer && python3 -m http.server 8000
```

Load data via file upload (drag-drop) or URL parameter:
```
http://localhost:8000/?data=<url>&run=<name>
```

For raw metrics JSON loaded from a URL, the viewer infers the experiment name from the path (for example `/apc030/metrics.json` becomes `apc030`).

Example, using the data above and pre-selecting the `apc030` run:
```
http://localhost:8000/?data=https%3A%2F%2Fgithub.com%2Fpowdr-labs%2Fbench-results%2Fblob%2Fgh-pages%2Fresults%2F2026-03-19-0538%2Fkeccak%2Fcombined_metrics.json&baseline=apc000&run=apc030
```

Verify:
- Summary table shows key metrics for all runs
- Stacked bar chart shows proof time breakdown; "By Component" tab shows grouped bars
- Clicking a run shows experiment details (details table + trace cell pie chart)
- URL updates with selected run and data source
- Version badge in navbar shows "OpenVM 1" or "OpenVM 2"
- For OpenVM 2: compression time appears in the breakdown, "App Cells (without padding)" row is hidden

## URL Parameters

```
?data=<url>           # Data source (loads raw or combined metrics JSON; GitHub blob URLs auto-converted to raw)
&run=<name>           # Pre-select a run by name
```

## Code Structure

The JavaScript in `index.html` is organized into clearly separated sections:

1. **Data Processing** — ports of Python logic, these are the core functions that compute all displayed numbers:
   - `normalizeMetricsData(json, sourceLabel)` — validates the incoming JSON shape, distinguishes raw-vs-combined input, and wraps raw files as a single experiment.
   - `detectOpenVmVersion(combinedData)` — returns `1` or `2` by checking for `logup_gkr` in metric names (V2-only).
   - `loadMetricsDataframes(json)` — port of [`metrics_utils.py:load_metrics_dataframes`](../../openvm-riscv/scripts/metrics_utils.py). Flattens `counter`+`gauge` arrays into entries, splits by `group` prefix into `app`, `leaf`, `internal`, `compression`.
   - `isNormalInstructionAir(name)` — port of [`metrics_utils.py:is_normal_instruction_air`](../../openvm-riscv/scripts/metrics_utils.py). Classifies AIR names as normal RISC-V instructions vs. precompiles.
   - `getMetric(entries, name)` — sums `value` for all entries matching a metric name.
   - `extractMetrics(runName, json)` — port of [`basic_metrics.py:extract_metrics`](../../openvm-riscv/scripts/basic_metrics.py). Computes all summary metrics (proof times, cell counts, ratios) from raw JSON.
   - `computeCellsByAir(json)` — port of [`plot_trace_cells.py:compute_cells_by_air`](../../openvm-riscv/scripts/plot_trace_cells.py). Aggregates cells by AIR name with 1.5% threshold.

2. **Metric Descriptions** — `METRIC_INFO` object (search for `const METRIC_INFO`). Single source of truth for human-readable descriptions and Python code snippets for every computed metric. Displayed as info-icon tooltips in the detail tables. When adding a new metric to the detail rows, add a corresponding entry here.

3. **Constants** — `COMPONENTS_V1`/`COMPONENTS_V2` (proof time breakdown components with colors), `TABLE_COLUMNS`, detail row arrays (`BASIC_STATS_ROWS_V1`/`V2`, `PROOF_TIME_ROWS_V1`/`V2`). Version-aware getters (`getComponents()`, `getBasicStatsRows()`, `getProofTimeRows()`) return the right variant.

4. **Chart Components** — `createBarChart()`, `createGroupedBarChart()`, `createPieChart()`, each rendering into their container.

5. **Table Components** — `createSummaryTable()`, `renderDetails()`.

6. **Data Loading & URL Handling** — file upload, URL fetch, parameter sync.


================================================
FILE: openvm/metrics-viewer/index.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>OpenVM Metrics Viewer</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <style>
        body {
            background-color: #f8f9fa;
        }

        .navbar {
            background: white;
            border-bottom: 1px solid #dee2e6;
            box-shadow: 0 1px 4px rgba(0,0,0,0.06);
        }
        .navbar-brand { color: #333; font-weight: 700; cursor: pointer; text-decoration: none; }
        .navbar-brand:hover { color: #000; }
        .data-source {
            color: #888; font-size: 0.85rem; margin-left: 1rem;
            flex: 1 1 auto; max-width: calc(100vw - 300px);
            white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
            text-align: right; font-family: 'Courier New', monospace;
        }
        .data-source a { color: #666; text-decoration: underline; }
        .data-source a:hover { color: #333; }
        .copy-icon:hover { color: #333 !important; }
        .copy-icon .copy-check { display: none; }
        .copy-icon .copy-default { display: inline; }
        .copy-icon.copied .copy-check { display: inline; }
        .copy-icon.copied .copy-default { display: none; }
        .copy-icon.copied { color: #1a7f37 !important; }

        /* Upload section */
        #uploadSection {
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 80vh;
        }
        .upload-container { max-width: 600px; width: 100%; }
        .dropzone {
            border: 3px dashed #dee2e6;
            border-radius: 10px;
            padding: 40px;
            text-align: center;
            cursor: pointer;
            transition: all 0.3s;
            background-color: white;
        }
        .dropzone:hover, .dropzone.drag-over {
            border-color: #0d6efd;
            background-color: #e7f1ff;
        }
        .dropzone h4 { color: #333; }
        .url-input-group { margin-top: 1.5rem; }
        .btn-load {
            background: #0d6efd;
            border: none;
            color: white;
        }
        .btn-load:hover { background: #0b5ed7; color: white; }

        /* App section */
        .format-hint {
            margin-top: 0.75rem;
            text-align: center;
            color: #6c757d;
            font-size: 0.9rem;
        }
        #appSection { display: none; }
        .pane {
            background-color: white;
            border-radius: 10px;
            padding: 20px;
            margin-bottom: 1rem;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        .pane h5 {
            color: #333;
            margin-bottom: 0.75rem;
            font-weight: 600;
        }

        /* Summary table */
        .summary-table {
            font-size: 0.85rem;
        }
        .summary-table th {
            background: #f8f9fa;
            color: #333;
            padding: 8px;
            white-space: nowrap;
            cursor: pointer;
            user-select: none;
            border-bottom: 2px solid #dee2e6;
        }
        .summary-table th:hover { background: #e9ecef; }
        .summary-table td {
            padding: 6px 8px;
            border-bottom: 1px solid #dee2e6;
            white-space: nowrap;
        }
        .summary-table tr {
            cursor: pointer;
            transition: background 0.15s;
        }
        .summary-table tr:hover { background: #e7f1ff; }
        .summary-table tr.selected { background: #cfe2ff; outline: 2px solid #0d6efd; }
        .summary-table .baseline-radio { text-align: center; width: 1px; }
        .summary-table .baseline-radio input { cursor: pointer; }

        /* Pie chart placeholder */
        .pie-placeholder {
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 300px;
            color: #999;
            font-style: italic;
        }

        /* Details table */
        .details-table {
            width: 100%;
            font-size: 0.82rem;
            margin-bottom: 0.25rem;
        }
        .details-table th {
            background: #f8f9fa;
            color: #666;
            padding: 4px 8px;
            font-weight: 600;
            white-space: nowrap;
            width: 50%;
        }
        .details-table td {
            padding: 4px 8px;
            white-space: nowrap;
        }
        .details-table tr { border-bottom: 1px solid #eee; }

        /* Page max width */
        #appSection { max-width: 1600px; margin-left: auto; margin-right: auto; }

        /* Equal height chart row */
        .chart-row { display: flex; }
        .chart-row > .chart-col-left { flex: 1; min-width: 0; display: flex; flex-direction: column; }
        .chart-row > .chart-col-right { flex: 1; min-width: 0; display: flex; flex-direction: column; }
        .chart-row > .chart-col-right > .pane { flex: 1; }
        .chart-row .pane { display: flex; flex-direction: column; }
        .chart-row .pane-body { flex: 1; overflow-y: auto; }

        /* Details layout: table + pie side by side */
        .details-row { display: flex; gap: 1rem; }
        .details-row .details-table-col { flex: 1; min-width: 0; }
        .details-row .details-pie-col { flex-shrink: 0; }

        /* Chart tooltips */
        .chart-tooltip {
            position: absolute;
            pointer-events: none;
            background: rgba(255, 255, 255, 0.97);
            border: 1px solid #dee2e6;
            border-radius: 6px;
            padding: 8px 12px;
            font-size: 0.8rem;
            color: #333;
            z-index: 100;
            white-space: nowrap;
            box-shadow: 0 2px 8px rgba(0,0,0,0.15);
        }

        /* Chart tabs */
        .nav-tabs .nav-link {
            color: #666;
            font-size: 0.85rem;
            padding: 0.3rem 0.75rem;
        }
        .nav-tabs .nav-link.active {
            color: #333;
            font-weight: 600;
        }

        /* Stacked bar chart */
        .bar-group { cursor: pointer; }
        .bar-group:hover rect { opacity: 0.85; }

        /* Info icon tooltip */
        .details-table th { position: relative; padding-right: 24px; }
        .metric-info {
            position: absolute;
            right: 4px;
            top: 50%;
            transform: translateY(-50%);
            width: 14px;
            height: 14px;
            line-height: 14px;
            text-align: center;
            font-size: 9px;
            font-style: normal;
            font-weight: 700;
            color: #bbb;
            border: 1px solid #ddd;
            border-radius: 50%;
            cursor: help;
        }
        .metric-info:hover { color: #555; border-color: #999; }
        .metric-info-tooltip {
            display: none;
            position: fixed;
            z-index: 200;
            background: #fff;
            border: 1px solid #ccc;
            border-radius: 6px;
            padding: 8px 10px;
            font-size: 0.78rem;
            font-weight: 400;
            color: #333;
            white-space: normal;
            width: 500px;
            max-width: 90vw;
            box-shadow: 0 2px 8px rgba(0,0,0,0.13);
            line-height: 1.4;
        }
        .metric-info-tooltip code {
            display: block;
            margin-top: 5px;
            padding: 6px 8px;
            background: #f8f8f8;
            border: 1px solid #e8e8e8;
            border-radius: 4px;
            font-size: 0.73rem;
            white-space: pre-wrap;
            word-break: break-all;
            color: #333;
            line-height: 1.5;
        }
        .metric-info-tooltip code .kw { color: #8700af; font-weight: 600; }
        .metric-info-tooltip code .fn { color: #005f87; }
        .metric-info-tooltip code .st { color: #4e9a06; }
        .metric-info-tooltip code .cm { color: #8f8f8f; font-style: italic; }
        .metric-info-tooltip code .nb { color: #b07800; }

        /* Collapsible detail table rows */
        .details-table tr.collapsible-parent th {
            cursor: pointer;
            user-select: none;
        }
        .details-table tr.collapsible-parent th .toggle-arrow {
            display: inline-block;
            width: 1em;
            font-size: 0.7em;
            transition: transform 0.15s;
        }
        .details-table tr.collapsible-parent.collapsed th .toggle-arrow {
            transform: rotate(-90deg);
        }
        .details-table tr.collapsed-child {
            display: none;
        }

        /* Loading */
        .loading-overlay {
            position: fixed;
            top: 0; left: 0; right: 0; bottom: 0;
            background: rgba(248, 249, 250, 0.9);
            display: flex;
            justify-content: center;
            align-items: center;
            z-index: 1000;
        }
        .loading-overlay .spinner-border { color: #0d6efd; }
    </style>
</head>
<body>
    <nav class="navbar">
        <div class="container-fluid">
            <a class="navbar-brand" href="#" onclick="window.location.href = window.location.pathname; return false;">OpenVM Metrics Viewer</a>
            <span id="versionBadge" style="display:none; font-size:0.75rem; padding:2px 8px; border-radius:4px; background:#e9ecef; color:#495057; font-weight:600; margin-left:0.5rem"></span>
            <span class="data-source" id="dataSourceLabel"></span>
        </div>
    </nav>

    <div id="uploadSection">
        <div class="upload-container">
            <div class="dropzone" id="dropzone">
                <h4>Drop metrics JSON here</h4>
                <p class="text-muted">or click to select a file</p>
                <input type="file" id="fileInput" accept=".json" style="display:none">
            </div>
            <div class="format-hint">Supports combined metrics JSON and raw metrics JSON with <code>counter</code> + <code>gauge</code>.</div>
            <div class="url-input-group input-group">
                <input type="text" class="form-control" id="urlInput"
                       placeholder="Or paste a URL to metrics JSON...">
                <button class="btn btn-load" id="urlLoadBtn">Load</button>
            </div>
        </div>
    </div>

    <div id="appSection" class="container-fluid mt-3">
        <div class="chart-row" style="gap:1rem; align-items:start">
            <div class="chart-col-left" style="gap:1rem">
                <div class="pane">
                    <h5>Summary Table</h5>
                    <div style="overflow-x:auto">
                        <table class="summary-table" style="width:100%" id="summaryTable">
                            <thead><tr></tr></thead>
                            <tbody></tbody>
                        </table>
                    </div>
                </div>
                <div class="pane">
                    <div style="display:flex; align-items:center; gap:0.5rem; margin-bottom:0.75rem">
                        <h5 style="margin:0">Proof Time Breakdown</h5>
                        <ul class="nav nav-tabs" style="margin:0; border:none; margin-left:auto">
                            <li class="nav-item">
                                <a class="nav-link active" href="#" id="tabStacked" onclick="switchChartTab('stacked'); return false;">Stacked</a>
                            </li>
                            <li class="nav-item">
                                <a class="nav-link" href="#" id="tabGrouped" onclick="switchChartTab('grouped'); return false;">By Component</a>
                            </li>
                        </ul>
                    </div>
                    <div class="pane-body">
                        <div id="barChart"></div>
                    </div>
                </div>
            </div>
            <div class="chart-col-right">
                <div class="pane">
                    <h5>Experiment Details <span id="pieRunName" class="text-muted" style="font-size:0.8rem"></span></h5>
                    <div class="pane-body">
                        <div id="detailsSection">
                            <div class="pie-placeholder">Select an experiment to view details</div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <div class="loading-overlay" id="loadingOverlay" style="display:none">
        <div class="spinner-border" role="status">
            <span class="visually-hidden">Loading...</span>
        </div>
    </div>

<script>
// ============================================================
// State
// ============================================================
let combinedData = null;
let allMetrics = [];
let selectedRunName = null;
let baselineRunName = null;
let currentChartTab = 'stacked';
let detectedOpenVmVersion = null; // 1 or 2

// ============================================================
// Data Shape Validation
// ============================================================
function assert(condition, message) {
    if (!condition) throw new Error(message);
}

function isObject(value) {
    return value !== null && typeof value === 'object' && !Array.isArray(value);
}

function hasRawMetricsShape(value) {
    return isObject(value) && Object.prototype.hasOwnProperty.call(value, 'counter')
        && Object.prototype.hasOwnProperty.call(value, 'gauge');
}

function assertMetricEntries(entries, fieldName, context) {
    assert(Array.isArray(entries), `${context}.${fieldName} must be an array.`);
    entries.forEach((entry, idx) => {
        const entryContext = `${context}.${fieldName}[${idx}]`;
        assert(isObject(entry), `${entryContext} must be an object.`);
        assert(Array.isArray(entry.labels), `${entryContext}.labels must be an array.`);
        entry.labels.forEach((label, labelIdx) => {
            assert(Array.isArray(label) && label.length === 2, `${entryContext}.labels[${labelIdx}] must be a [key, value] pair.`);
        });
        assert(typeof entry.metric === 'string', `${entryContext}.metric must be a string.`);
        assert(typeof entry.value === 'string' || typeof entry.value === 'number', `${entryContext}.value must be a string or number.`);
    });
}

function assertRawMetricsShape(metricsJson, context) {
    assert(isObject(metricsJson), `${context} must be a JSON object.`);
    assert(hasRawMetricsShape(metricsJson), `${context} must contain both "counter" and "gauge" fields.`);
    assertMetricEntries(metricsJson.counter, 'counter', context);
    assertMetricEntries(metricsJson.gauge, 'gauge', context);
}

function inferRawRunName(source) {
    if (!source) return 'experiment';

    const sourceName = source.trim().split('?')[0].split('#')[0];
    if (/^metrics\.json$/i.test(sourceName)) return 'experiment';

    try {
        const url = new URL(source, window.location.href);
        const parts = url.pathname.split('/').filter(Boolean);
        const fileName = parts[parts.length - 1] || '';
        const parentName = parts[parts.length - 2] || '';
        if (fileName === 'metrics.json' && parentName) return parentName;
        if (fileName) return fileName.replace(/\.json$/i, '') || 'experiment';
    } catch (_) {
        const fileName = source.split('/').pop() || source;
        if (/^metrics\.json$/i.test(fileName)) return 'experiment';
        return fileName.replace(/\.json$/i, '') || 'experiment';
    }

    return 'experiment';
}

function normalizeMetricsData(data, sourceLabel) {
    assert(isObject(data), 'Metrics JSON must be a JSON object.');

    if (hasRawMetricsShape(data)) {
        const runName = inferRawRunName(sourceLabel);
        assertRawMetricsShape(data, `metrics.${runName}`);
        return { [runName]: data };
    }

    const entries = Object.entries(data);
    assert(entries.length > 0, 'Combined metrics JSON must contain at least one experiment.');
    entries.forEach(([runName, metricsJson]) => assertRawMetricsShape(metricsJson, `metrics.${runName}`));
    return data;
}

// ============================================================
// OpenVM Version Detection
// ============================================================

// Detects whether metrics come from OpenVM 1 or OpenVM 2.
// OpenVM 2 uses LogUp GKR (metric names containing 'logup_gkr').
function detectOpenVmVersion(combinedData) {
    const firstRun = Object.values(combinedData)[0];
    if (!firstRun) return 1;
    const allMetricNames = [
        ...firstRun.counter.map(e => e.metric),
        ...firstRun.gauge.map(e => e.metric),
    ];
    if (allMetricNames.some(n => n.includes('logup_gkr'))) return 2;
    return 1;
}

// ============================================================
// Data Processing — ports of Python logic
//
// These functions compute all displayed numbers from raw JSON.
// See CLAUDE.md for the mapping to Python source functions.
// ============================================================

// Port of metrics_utils.py:load_metrics_dataframes
// Flattens counter+gauge arrays into flat objects keyed by label names,
// then splits into app/leaf/internal/compression by the "group" label prefix.
function loadMetricsDataframes(metricsJson) {
    const entries = [...metricsJson.counter, ...metricsJson.gauge].map(c => {
        const obj = Object.fromEntries(c.labels);
        obj.metric = c.metric;
        obj.value = c.value;
        return obj;
    });

    let app = entries.filter(e => (e.group || '').startsWith('app_proof'));
    if (app.length === 0) {
        app = entries.filter(e => (e.group || '').startsWith('reth'));
    }
    const leaf = entries.filter(e => (e.group || '').startsWith('leaf'));
    const internal = entries.filter(e => (e.group || '').startsWith('internal'));
    const compression = entries.filter(e => (e.group || '') === 'compression');

    return { app, leaf, internal, compression };
}

// Port of metrics_utils.py:is_normal_instruction_air
// Returns true for standard RISC-V instruction AIRs (VmAirWrapper with 4 limbs),
// false for precompiles (FieldExpressionCoreAir, non-4-limb, or non-VmAirWrapper).
function isNormalInstructionAir(airName) {
    const match = (airName || '').match(/^VmAirWrapper<[^,]+,\s*([^>]+?)(?:<(\d+)(?:,\s*\d+)*>)?\s*>$/);
    if (!match) return false;
    const coreName = match[1];
    const numLimbs = match[2];
    if (coreName === 'FieldExpressionCoreAir') return false;
    if (numLimbs && parseInt(numLimbs) !== 4) return false;
    return true;
}

// Sum all values for entries matching a given metric name.
function getMetric(entries, metricName) {
    return entries.filter(e => e.metric === metricName)
                  .reduce((sum, e) => sum + parseFloat(e.value), 0);
}

// Get the value of a metric that must appear exactly once.
function getUniqueMetric(entries, metricName) {
    const matches = entries.filter(e => e.metric === metricName);
    if (matches.length !== 1) throw new Error(`Expected exactly 1 entry for "${metricName}", found ${matches.length}`);
    return parseFloat(matches[0].value);
}

// Port of basic_metrics.py:extract_metrics
// Computes all summary metrics for a single run from its raw JSON.
// Each metric maps directly to a getMetric() call on a specific group + metric name.
function extractMetrics(runName, metricsJson) {
    const { app, leaf, internal, compression } = loadMetricsDataframes(metricsJson);
    const allEntries = [...metricsJson.counter, ...metricsJson.gauge].map(c => {
        const obj = Object.fromEntries(c.labels);
        obj.metric = c.metric;
        obj.value = c.value;
        return obj;
    });
    const m = {};

    m.name = runName;

    // Classify app AIRs into powdr / normal instruction / openvm precompile
    const powdrAir = app.filter(e => (e.air_name || '').startsWith('PowdrAir'));
    const nonPowdrAir = app.filter(e => !(e.air_name || '').startsWith('PowdrAir'));
    const normalInstructionAir = nonPowdrAir.filter(e => isNormalInstructionAir(e.air_name || ''));
    const openVmPrecompileAir = nonPowdrAir.filter(e => !isNormalInstructionAir(e.air_name || ''));

    // Proof times by phase.
    // execute_metered runs *before* segment proving and is outside per-segment
    // total_proof_time_ms. We report it as a separate top-level phase.
    m.execute_metered_time_ms = getMetric(app, 'execute_metered_time_ms');
    m.app_proof_time_ms = getMetric(app, 'total_proof_time_ms');
    m.leaf_proof_time_ms = getMetric(leaf, 'total_proof_time_ms');
    m.inner_recursion_proof_time_ms = getMetric(internal, 'total_proof_time_ms');
    m.compression_proof_time_ms = getMetric(compression, 'total_proof_time_ms');
    m.total_proof_time_ms = m.execute_metered_time_ms + m.app_proof_time_ms + m.leaf_proof_time_ms
        + m.inner_recursion_proof_time_ms + m.compression_proof_time_ms;

    // STARK prove time excluding trace generation
    m.app_proof_time_excluding_trace_ms = getMetric(app, 'stark_prove_excluding_trace_time_ms');
    m.leaf_proof_time_excluding_trace_ms = getMetric(leaf, 'stark_prove_excluding_trace_time_ms');
    m.inner_recursion_proof_time_excluding_trace_ms = getMetric(internal, 'stark_prove_excluding_trace_time_ms');
    m.compression_proof_time_excluding_trace_ms = getMetric(compression, 'stark_prove_excluding_trace_time_ms');
    m.total_proof_time_excluding_trace_ms = m.app_proof_time_excluding_trace_ms + m.leaf_proof_time_excluding_trace_ms
        + m.inner_recursion_proof_time_excluding_trace_ms + m.compression_proof_time_excluding_trace_ms;

    // Column counts (summed over all segments)
    const mainCols = getMetric(app, 'main_cols');
    const prepCols = getMetric(app, 'prep_cols');
    const permCols = getMetric(app, 'perm_cols');
    m.app_proof_cols = mainCols + prepCols + permCols;

    m.num_segments = app.filter(e => 'segment' in e).reduce((max, e) => Math.max(max, parseInt(e.segment)), -1) + 1;
    m.num_air_instances = app.filter(e => e.metric === 'rows').length;

    m.app_proof_cells = getMetric(app, 'total_cells');
    m.app_proof_cells_used = getMetric(app, 'total_cells_used'); // V1 only, null-ish in V2
    m.app_execute_preflight_time_ms = getMetric(app, 'execute_preflight_time_ms');
    m.app_trace_gen_time_ms = getMetric(app, 'trace_gen_time_ms');

    // V2: STARK sub-components (prover.* metrics)
    m.app_trace_commit_time_ms = getMetric(app, 'prover.main_trace_commit_time_ms');
    m.app_rap_constraints_time_ms = getMetric(app, 'prover.rap_constraints_time_ms');
    m.app_openings_time_ms = getMetric(app, 'prover.openings_time_ms');
    m.app_stark_other_ms = m.app_proof_time_excluding_trace_ms
        - m.app_trace_commit_time_ms - m.app_rap_constraints_time_ms - m.app_openings_time_ms;

    // V2: rap_constraints sub-components (additive: logup_gkr + round0 + mle_rounds = rap)
    m.app_rap_logup_gkr_time_ms = getMetric(app, 'prover.rap_constraints.logup_gkr_time_ms');
    m.app_rap_round0_time_ms = getMetric(app, 'prover.rap_constraints.round0_time_ms');
    m.app_rap_mle_rounds_time_ms = getMetric(app, 'prover.rap_constraints.mle_rounds_time_ms');
    m.app_rap_other_ms = m.app_rap_constraints_time_ms
        - m.app_rap_logup_gkr_time_ms - m.app_rap_round0_time_ms - m.app_rap_mle_rounds_time_ms;

    // V2: openings sub-components (additive: whir + stacked_reduction = openings)
    m.app_openings_whir_time_ms = getMetric(app, 'prover.openings.whir_time_ms');
    m.app_openings_stacked_reduction_time_ms = getMetric(app, 'prover.openings.stacked_reduction_time_ms');
    m.app_openings_other_ms = m.app_openings_time_ms
        - m.app_openings_whir_time_ms - m.app_openings_stacked_reduction_time_ms;

    // V2: additional per-segment sub-components
    m.app_set_initial_memory_time_ms = getMetric(app, 'set_initial_memory_time_ms');

    // "App other" = app proof time minus all known sub-components.
    // "App other" = app proof time minus all known sub-components.
    // execute_metered is a separate top-level phase, not inside app_proof_time_ms.
    // Can be negative when sub-timers overlap (parallelism). The raw value
    // is stored for the detail table; the bar chart clamps to 0.
    m.app_other_ms = m.app_proof_time_ms
        - m.app_proof_time_excluding_trace_ms
        - m.app_execute_preflight_time_ms
        - m.app_trace_gen_time_ms
        - m.app_set_initial_memory_time_ms;

    // Cell ratios by AIR classification
    const normalCells = getMetric(normalInstructionAir, 'cells');
    const openVmCells = getMetric(openVmPrecompileAir, 'cells');
    const powdrCells = getMetric(powdrAir, 'cells');

    m.normal_instruction_ratio = m.app_proof_cells > 0 ? normalCells / m.app_proof_cells : 0;
    m.openvm_precompile_ratio = m.app_proof_cells > 0 ? openVmCells / m.app_proof_cells : 0;
    m.powdr_ratio = m.app_proof_cells > 0 ? powdrCells / m.app_proof_cells : 0;
    m.powdr_rows = getMetric(powdrAir, 'rows');

    // Constraints and bus interactions — per-AIR metrics (no group/segment labels).
    // Since they're per-AIR, we weight by segment count (for totals) or row count (for instances).
    const hasConstraints = allEntries.some(e => e.metric === 'constraints');
    const hasInteractions = allEntries.some(e => e.metric === 'interactions');

    // Build per-AIR weights from app rows entries:
    //   segmentsByAppAir[key] = number of segments the AIR appears in
    //   rowsByAppAir[key] = total rows across all segments
    // We key by "air_id:air_name" because air_id alone is only unique within a
    // proving phase — different phases (app, leaf, compression) reuse the same
    // air_id for unrelated AIRs.
    const segmentsByAppAir = {};
    const rowsByAppAir = {};
    app.filter(e => e.metric === 'rows').forEach(e => {
        const key = e.air_id + ':' + (e.air_name || '');
        segmentsByAppAir[key] = (segmentsByAppAir[key] || 0) + 1;
        rowsByAppAir[key] = (rowsByAppAir[key] || 0) + parseFloat(e.value);
    });

    // weighted_sum: for each all_entries entry matching metricName, multiply by weight
    function weightedSum(metricName, weights) {
        return allEntries.filter(e => e.metric === metricName)
            .reduce((sum, e) => sum + parseFloat(e.value) * (weights[e.air_id + ':' + (e.air_name || '')] || 0), 0);
    }

    m.constraints = hasConstraints ? weightedSum('constraints', segmentsByAppAir) : null;
    m.bus_interactions = hasInteractions ? weightedSum('interactions', segmentsByAppAir) : null;
    m.constraint_instances = hasConstraints ? weightedSum('constraints', rowsByAppAir) : null;
    m.bus_interaction_messages = hasInteractions ? weightedSum('interactions', rowsByAppAir) : null;

    return m;
}

// Port of plot_trace_cells.py:compute_cells_by_air
// Aggregates cells by air_name from the app group, sorts descending,
// and groups AIRs below 1.5% of total into "Other".
function computeCellsByAir(metricsJson) {
    const { app } = loadMetricsDataframes(metricsJson);
    const cellEntries = app.filter(e => e.metric === 'cells');

    const cellsByAir = {};
    cellEntries.forEach(e => {
        const name = e.air_name;
        cellsByAir[name] = (cellsByAir[name] || 0) + parseFloat(e.value);
    });

    const sorted = Object.entries(cellsByAir).sort((a, b) => b[1] - a[1]);
    const total = sorted.reduce((s, [, v]) => s + v, 0);

    const threshold = total * 0.015;
    const powdrItems = [];
    const otherItems = [];
    let otherSum = 0;
    for (const [name, val] of sorted) {
        if (val < threshold) {
            otherSum += val;
        } else if (name.startsWith('PowdrAir')) {
            powdrItems.push({ name, value: val });
        } else {
            otherItems.push({ name, value: val });
        }
    }
    if (otherSum > 0) {
        otherItems.push({ name: 'Other', value: otherSum });
    }
    // PowdrAir items first, then the rest sorted by size
    const items = [...powdrItems, ...otherItems];

    return { items, total };
}

// ============================================================
// Python syntax highlighting for metric info tooltips
// ============================================================
function highlightPython(code) {
    if (!code) return '';
    const esc = s => s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
    const KW = new Set(['def','for','in','if','and','or','not','return','else','import','from',
        'True','False','None','lambda','with','as','class','is','raise','try','except',
        'finally','while','break','continue','pass','yield','del','assert','elif']);
    const FN = new Set(['sum','max','min','int','float','str','len','set','dict','list','range',
        'print','sorted','any','all','map','filter','zip','enumerate','isinstance','getattr',
        'hasattr','type','abs','round','open','startswith']);
    // Single-pass tokenizer: match comments, strings, words, numbers, or any other char
    const TOKEN = /#[^\n]*|'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"|\b[a-zA-Z_]\w*\b|\b\d+\.?\d*\b|./gs;
    let out = '';
    let m;
    while ((m = TOKEN.exec(code)) !== null) {
        const t = m[0];
        if (t.startsWith('#')) { out += '<span class="cm">' + esc(t) + '</span>'; }
        else if (t.startsWith("'") || t.startsWith('"')) { out += '<span class="st">' + esc(t) + '</span>'; }
        else if (KW.has(t)) { out += '<span class="kw">' + esc(t) + '</span>'; }
        else if (FN.has(t)) { out += '<span class="fn">' + esc(t) + '</span>'; }
        else if (/^\d/.test(t)) { out += '<span class="nb">' + esc(t) + '</span>'; }
        else { out += esc(t); }
    }
    return out;
}

// ============================================================
// Metric Descriptions (single source of truth)
//
// Each key matches a field produced by extractMetrics().
//   `desc` — human-readable explanation (use `descV1`/`descV2` for version-specific text)
//   `code` — Python snippet showing how this metric is computed
//            (use `codeV1`/`codeV2` for version-specific code)
//
// Code snippets assume pre-filtered entry lists (app, leaf, internal, compression)
// and a helper:
//   def sum_metric(entries, metric):
//       return sum(float(e['value']) for e in entries if e['metric'] == metric)
// ============================================================
const METRIC_INFO = {
    // --- Basic Stats ---
    num_segments: {
        desc: 'Number of proving segments the app proof is split into.',
        code: `max(int(e['segment']) for e in app_entries) + 1`,
    },
    num_air_instances: {
        desc: 'Total number of AIR instances across all segments (each AIR in each segment counts as one instance).',
        code: `len([e for e in app if e['metric'] == 'rows'])`,
    },
    app_proof_cols: {
        desc: 'Total column count in the app proof, summed over all AIRs and segments (main + preprocessed + permutation trace columns).',
        code: `sum_metric(app, 'main_cols')
+ sum_metric(app, 'prep_cols')
+ sum_metric(app, 'perm_cols')`,
    },
    app_proof_cells: {
        descV1: 'Total trace cells in the app proof, summed over all segments. Includes main, preprocessed, and permutation cells. Also includes padding.',
        descV2: 'Total trace cells in the app proof, summed over all segments. Includes main, preprocessed, and permutation cells.',
        code: "sum_metric(app, 'total_cells')",
    },
    app_proof_cells_used: {
        desc: 'Trace cells actually used before padding. Includes main and permutation cells, NOT preprocessed cells. The percentage shows utilization (used / total).',
        code: "sum_metric(app, 'total_cells_used')",
    },
    constraints: {
        desc: 'Total constraint polynomials across all app proof AIRs and segments.',
        code: `segments_by_air = {}  # air -> segment count, from app rows
weighted_sum("constraints", segments_by_air)`,
    },
    constraint_instances: {
        desc: 'Total constraint evaluations: each AIR\'s constraint count weighted by its total row count across all segments.',
        code: `rows_by_air = {}  # air -> total rows, from app rows
weighted_sum("constraints", rows_by_air)`,
    },
    bus_interactions: {
        desc: 'Total bus interaction definitions across all app proof AIRs and segments.',
        code: `weighted_sum("interactions", segments_by_air)`,
    },
    bus_interaction_messages: {
        desc: 'Total bus messages: each AIR\'s interaction count weighted by its total row count across all segments.',
        code: `weighted_sum("interactions", rows_by_air)`,
    },

    // --- Proof Time ---
    execute_metered_time_ms: {
        desc: 'Metered execution: instrumented run that counts executed instructions per AIR to guide segment splitting. Runs before segment proving, reported as a separate top-level phase.',
        code: "sum_metric(app, 'execute_metered_time_ms')",
    },
    app_proof_time_ms: {
        desc: 'Sum of per-segment proving times. Does not include metered execution (which is a separate phase).',
        code: "sum_metric(app, 'total_proof_time_ms')",
    },
    app_proof_time_excluding_trace_ms: {
        descV1: 'STARK prover time excluding trace generation: commitments, quotient computation, FRI opening.',
        descV2: 'STARK prover time excluding trace generation: trace commit, constraint evaluation, LogUp GKR, and WHIR opening proofs.',
        code: "sum_metric(app, 'stark_prove_excluding_trace_time_ms')",
    },
    app_rap_constraints_time_ms: {
        desc: 'Time for RAP constraint evaluation, including zerocheck and the LogUp GKR protocol for bus interactions.',
        code: "sum_metric(app, 'prover.rap_constraints_time_ms')",
    },
    app_rap_logup_gkr_time_ms: {
        desc: 'Time for the LogUp GKR protocol, which proves bus interaction correctness via a GKR-based sumcheck.',
        code: "sum_metric(app, 'prover.rap_constraints.logup_gkr_time_ms')",
    },
    app_rap_round0_time_ms: {
        desc: 'Time for Round 0 of the constraint evaluation sumcheck (initial round before MLE rounds).',
        code: "sum_metric(app, 'prover.rap_constraints.round0_time_ms')",
    },
    app_rap_mle_rounds_time_ms: {
        desc: 'Time for MLE (multilinear extension) rounds of the constraint evaluation sumcheck.',
        code: "sum_metric(app, 'prover.rap_constraints.mle_rounds_time_ms')",
    },
    app_rap_other_ms: {
        desc: 'Residual: constraint evaluation time minus LogUp GKR, Round 0, and MLE rounds.',
        code: `rap_constraints - logup_gkr - round0 - mle_rounds`,
    },
    app_openings_time_ms: {
        descV1: 'Time for polynomial opening proofs (FRI-based PCS opening).',
        descV2: 'Time for polynomial opening proofs using WHIR (a FRI-like protocol) with stacked polynomial reduction.',
        code: "sum_metric(app, 'prover.openings_time_ms')",
    },
    app_openings_whir_time_ms: {
        desc: 'Time for the WHIR polynomial commitment opening protocol.',
        code: "sum_metric(app, 'prover.openings.whir_time_ms')",
    },
    app_openings_stacked_reduction_time_ms: {
        desc: 'Time for stacked polynomial reduction, which batches multiple polynomial openings before WHIR.',
        code: "sum_metric(app, 'prover.openings.stacked_reduction_time_ms')",
    },
    app_openings_other_ms: {
        desc: 'Residual: openings time minus WHIR and stacked reduction.',
        code: `openings - whir - stacked_reduction`,
    },
    app_trace_commit_time_ms: {
        desc: 'Time for committing the main execution trace via Merkle tree construction.',
        code: "sum_metric(app, 'prover.main_trace_commit_time_ms')",
    },
    app_stark_other_ms: {
        desc: 'Residual: STARK time minus constraint evaluation, openings, and trace commit.',
        code: `stark_excl_trace - constraints - openings - trace_commit`,
    },
    app_execute_preflight_time_ms: {
        desc: 'Preflight execution: a fast initial pass that determines segment boundaries and per-AIR trace heights without generating the full trace.',
        code: "sum_metric(app, 'execute_preflight_time_ms')",
    },
    app_set_initial_memory_time_ms: {
        desc: 'Time to initialize the memory state at the start of each proving segment (V2 only).',
        code: "sum_metric(app, 'set_initial_memory_time_ms')",
    },
    app_trace_gen_time_ms: {
        desc: 'Trace generation: re-executes the program to produce the full execution trace (witness) consumed by the STARK prover.',
        code: "sum_metric(app, 'trace_gen_time_ms')",
    },
    app_other_ms: {
        desc: 'Residual: app proof time minus all known sub-components. Metered execution is a separate top-level phase. May be negative when sub-timers overlap due to parallelism.',
        codeV1: `app_proof - stark_excl_trace - preflight - trace_gen`,
        codeV2: `app_proof - stark_excl_trace - preflight
  - trace_gen - set_initial_memory`,
    },
    leaf_proof_time_ms: {
        descV1: 'Total proving time for the leaf aggregation layer (leaf_0, leaf_1, ...). Aggregates app proof segments into a smaller number of proofs.',
        codeV1: "sum_metric(leaf, 'total_proof_time_ms')",
        descV2: 'Total proving time for the leaf aggregation layer. Aggregates app proof segments into a smaller number of proofs.',
        codeV2: "sum_metric(leaf, 'total_proof_time_ms')",
    },
    inner_recursion_proof_time_ms: {
        descV1: 'Total proving time for inner recursion layers (internal_0, internal_1, ...). Recursively aggregates leaf proofs.',
        codeV1: "sum_metric(internal, 'total_proof_time_ms')",
        descV2: 'Total proving time for inner recursion layers (internal_for_leaf, internal_recursive.*). Recursively aggregates leaf proofs.',
        codeV2: "sum_metric(internal, 'total_proof_time_ms')",
    },
    compression_proof_time_ms: {
        desc: 'Total proving time for the compression layer (V2 only). Produces a compact final proof from the recursion output.',
        code: "sum_metric(compression, 'total_proof_time_ms')",
    },
    total_proof_time_ms: {
        descV1: 'Sum of all proving phases: metered execution + app proof + leaf recursion + inner recursion.',
        descV2: 'Sum of all proving phases: metered execution + app proof + leaf recursion + inner recursion + compression.',
        codeV1: `metered + app + leaf + inner_recursion`,
        codeV2: `metered + app + leaf + inner_recursion + compression`,
    },

    // --- Cell Distribution ---
    powdr_ratio: {
        desc: 'Fraction of app proof trace cells belonging to powdr autoprecompile AIRs (air_name starts with "PowdrAir"). These are synthesized circuits that replace sequences of basic instructions.',
        code: `sum(cells for air if air_name.startswith('PowdrAir'))
/ total_cells`,
    },
    normal_instruction_ratio: {
        desc: 'Fraction of app proof trace cells belonging to standard RISC-V instruction AIRs: VmAirWrapper types with 4 limbs, excluding FieldExpressionCoreAir.',
        code: `sum(cells for air if is_normal_instruction_air(air_name))
/ total_cells`,
    },
    openvm_precompile_ratio: {
        desc: 'Fraction of app proof trace cells belonging to OpenVM built-in precompile AIRs (neither powdr nor normal instruction AIRs). Includes SHA-256, Keccak, ECC, etc.',
        code: `sum(cells for air if not powdr and not normal)
/ total_cells`,
    },
};

// ============================================================
// Constants — proof time breakdown components
// Colors and order match basic_metrics.py:plot()
// Version-dependent: V2 adds compression phase.
// ============================================================
const COMPONENTS_V1 = [
    { key: 'inner_recursion_proof_time_ms', label: 'Inner recursion', color: '#9b3e00' },
    { key: 'leaf_proof_time_ms', label: 'Leaf recursion', color: '#d69600' },
    { key: 'execute_metered_time_ms', label: 'Metered execution', color: '#c6dbef' },
    { key: 'app_proof_time_excluding_trace_ms', label: 'App STARK (excl. trace)', color: '#1f77b4' },
    { key: 'app_trace_gen_time_ms', label: 'App trace gen', color: '#6baed6' },
    { key: 'app_execute_preflight_time_ms', label: 'App preflight', color: '#9ecae1' },
    { key: 'app_other_ms', label: 'App other', color: '#08519c' },
];

const COMPONENTS_V2 = [
    { key: 'compression_proof_time_ms', label: 'Compression', color: '#7b2d8e' },
    { key: 'inner_recursion_proof_time_ms', label: 'Inner recursion', color: '#9b3e00' },
    { key: 'leaf_proof_time_ms', label: 'Leaf recursion', color: '#d69600' },
    { key: 'execute_metered_time_ms', label: 'Metered execution', color: '#dadaeb' },
    { key: 'app_proof_time_excluding_trace_ms', label: 'App STARK (excl. trace)', color: '#1f77b4' },
    { key: 'app_trace_gen_time_ms', label: 'App trace gen', color: '#9ecae1' },
    { key: 'app_set_initial_memory_time_ms', label: 'App set memory', color: '#a1d99b' },
    { key: 'app_execute_preflight_time_ms', label: 'App preflight', color: '#c6dbef' },
    { key: 'app_other_ms', label: 'App other', color: '#969696' },
];

function getComponents() {
    return detectedOpenVmVersion === 2 ? COMPONENTS_V2 : COMPONENTS_V1;
}

// ============================================================
// Format helpers
// ============================================================
// Data source display — matches autoprecompile-analyzer pattern
function shortenUrl(urlText) {
    if (urlText.length <= 70) return urlText;
    return urlText.slice(0, 35) + '...' + urlText.slice(-35);
}

function updateDataSourceDisplay(source) {
    const el = document.getElementById('dataSourceLabel');
    if (source) {
        const displayText = shortenUrl(source.replace(/^https?:\/\//i, ''));
        el.innerHTML = `Data: <a href="${source}" target="_blank" rel="noopener noreferrer">${displayText}</a>`
            + ` <svg class="copy-icon" onclick="navigator.clipboard.writeText('${source.replace(/'/g, "\\'")}').then(() => { this.classList.add('copied'); this.setAttribute('title','Copied!'); setTimeout(() => { this.classList.remove('copied'); this.setAttribute('title','Copy data URL'); }, 2000); })" `
            + `title="Copy data URL" width="16" height="16" viewBox="0 0 16 16" fill="currentColor" style="margin-left:0.4rem; cursor:pointer; color:#888; vertical-align:middle;">`
            + `<path class="copy-default" d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 010 1.5h-1.5a.25.25 0 00-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 00.25-.25v-1.5a.75.75 0 011.5 0v1.5A1.75 1.75 0 019.25 16h-7.5A1.75 1.75 0 010 14.25z"/>`
            + `<path class="copy-default" d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0114.25 11h-7.5A1.75 1.75 0 015 9.25zm1.75-.25a.25.25 0 00-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 00.25-.25v-7.5a.25.25 0 00-.25-.25z"/>`
            + `<path class="copy-check" d="M13.78 4.22a.75.75 0 010 1.06l-7.25 7.25a.75.75 0 01-1.06 0L2.22 9.28a.75.75 0 011.06-1.06L6 10.94l6.72-6.72a.75.75 0 011.06 0z"/>`
            + `</svg>`;
        el.style.display = 'inline';
    } else {
        el.innerHTML = '';
        el.style.display = 'none';
    }
}

function fmtSeconds(ms) { return (ms / 1000).toFixed(1) + 's'; }
function fmtCells(n) {
    if (n >= 1e9) return (n / 1e9).toFixed(2) + 'B';
    if (n >= 1e6) return (n / 1e6).toFixed(1) + 'M';
    return n.toLocaleString();
}
function fmtPct(ratio) { return (ratio * 100).toFixed(1) + '%'; }

// Format comparison vs baseline. "Lower is better" for all numeric stats.
function fmtComparison(value, baselineValue) {
    if (baselineValue === 0 || baselineValue == null || value == null) return '';
    const ratio = value / baselineValue;
    if (Math.abs(ratio - 1) < 0.005) return '<span style="color:#999; font-size:0.75rem">1.00x →</span>';
    if (ratio < 1) {
        return `<span style="color:#198754; font-size:0.75rem">${(1/ratio).toFixed(2)}x ↓</span>`;
    } else {
        return `<span style="color:#dc3545; font-size:0.75rem">${ratio.toFixed(2)}x ↑</span>`;
    }
}

// ============================================================
// Chart Legend Helper
// ============================================================
function layoutLegend(svg, COMPONENTS, x, y, availableWidth) {
    const legendRowHeight = 18;
    const colWidth = 180;
    const cols = Math.max(1, Math.floor(availableWidth / colWidth));
    const rows = Math.ceil(COMPONENTS.length / cols);
    const legend = svg.append('g').attr('transform', `translate(${x}, ${y})`);
    COMPONENTS.forEach((c, i) => {
        const col = i % cols;
        const row = Math.floor(i / cols);
        const lg = legend.append('g').attr('transform', `translate(${col * colWidth}, ${row * legendRowHeight})`);
        lg.append('rect').attr('width', 12).attr('height', 12).attr('fill', c.color).attr('rx', 2);
        lg.append('text').attr('x', 16).attr('y', 10).text(c.label)
            .attr('fill', '#666').attr('font-size', '10px');
    });
    return rows * legendRowHeight;
}

// ============================================================
// Stacked Bar Chart
// ============================================================
function createBarChart(metrics) {
    const COMPONENTS = getComponents();
    const container = d3.select('#barChart');
    container.selectAll('*').remove();

    const margin = { top: 30, right: 20, bottom: 140, left: 60 };
    const width = container.node().getBoundingClientRect().width - margin.left - margin.right;
    const height = 420 - margin.top - margin.bottom;

    const svg = container.append('svg')
        .attr('width', width + margin.left + margin.right)
        .attr('height', height + margin.top + margin.bottom);
    const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`);

    const runNames = metrics.map(m => m.name);

    // Compute stacked data
    const stackData = metrics.map(m => {
        const d = { name: m.name };
        COMPONENTS.forEach(c => { d[c.key] = m[c.key] / 1000; }); // ms -> s
        return d;
    });

    const keys = COMPONENTS.map(c => c.key);
    const stack = d3.stack().keys(keys);
    const series = stack(stackData);

    const maxY = d3.max(stackData, d => keys.reduce((s, k) => s + d[k], 0));

    const x = d3.scaleBand().domain(runNames).range([0, width]).padding(0.3);
    const y = d3.scaleLinear().domain([0, maxY * 1.08]).range([height, 0]);

    // Grid
    g.append('g').attr('class', 'grid')
        .call(d3.axisLeft(y).tickSize(-width).tickFormat(''))
        .selectAll('line').attr('stroke', '#dee2e6').attr('stroke-opacity', 0.7);
    g.selectAll('.grid .domain').remove();

    // Tooltip
    const tooltip = d3.select('body').append('div').attr('class', 'chart-tooltip').style('display', 'none');

    // Bars
    const colorMap = {};
    COMPONENTS.forEach(c => { colorMap[c.key] = c.color; });
    const labelMap = {};
    COMPONENTS.forEach(c => { labelMap[c.key] = c.label; });

    series.forEach(s => {
        g.selectAll(`.bar-${s.key}`)
            .data(s)
            .enter().append('rect')
            .attr('class', d => `bar-group bar-${d.data.name}`)
            .attr('x', d => x(d.data.name))
            .attr('y', d => y(d[1]))
            .attr('height', d => y(d[0]) - y(d[1]))
            .attr('width', x.bandwidth())
            .attr('fill', colorMap[s.key])
            .attr('data-run', d => d.data.name)
            .on('click', (event, d) => selectRun(d.data.name))
            .on('mouseover', (event, d) => {
                const val = (d[1] - d[0]).toFixed(1);
                const total = keys.reduce((sum, k) => sum + d.data[k], 0);
                const pct = total > 0 ? ((d[1] - d[0]) / total * 100).toFixed(1) : '0';
                tooltip.style('display', 'block')
                    .html(`<strong>${d.data.name}</strong><br>${labelMap[s.key]}: ${val}s (${pct}%)`);
            })
            .on('mousemove', event => {
                tooltip.style('left', (event.pageX + 12) + 'px').style('top', (event.pageY - 20) + 'px');
            })
            .on('mouseout', () => tooltip.style('display', 'none'));
    });

    // Value labels inside segments — only show if segment is tall enough for text
    const minSegmentPx = 18; // minimum pixel height to show a label
    series.forEach(s => {
        s.forEach(d => {
            const val = d[1] - d[0];
            const segmentPx = y(d[0]) - y(d[1]);
            if (segmentPx < minSegmentPx) return;
            const total = keys.reduce((sum, k) => sum + d.data[k], 0);
            const pct = total > 0 ? (val / total * 100).toFixed(1) : '0';
            g.append('text')
                .attr('x', x(d.data.name) + x.bandwidth() / 2)
                .attr('y', y((d[0] + d[1]) / 2))
                .attr('text-anchor', 'middle')
                .attr('dominant-baseline', 'middle')
                .attr('fill', '#333')
                .attr('font-size', '10px')
                .attr('font-weight', 'bold')
                .attr('pointer-events', 'none')
                .text(`${val.toFixed(1)} (${pct}%)`);
        });
    });

    // Total labels on top
    stackData.forEach(d => {
        const total = keys.reduce((s, k) => s + d[k], 0);
        g.append('text')
            .attr('x', x(d.name) + x.bandwidth() / 2)
            .attr('y', y(total) - 5)
            .attr('text-anchor', 'middle')
            .attr('fill', '#333')
            .attr('font-size', '11px')
            .attr('font-weight', 'bold')
            .text(`Total: ${total.toFixed(1)}s`);
    });

    // Axes
    const xAxis = g.append('g').attr('transform', `translate(0,${height})`)
        .call(d3.axisBottom(x));
    xAxis.selectAll('text').attr('fill', '#333')
        .attr('transform', 'rotate(-35)')
        .attr('text-anchor', 'end')
        .attr('dx', '-0.5em')
        .attr('dy', '0.3em');
    g.append('g')
        .call(d3.axisLeft(y).tickFormat(d => d.toFixed(1) + 's'))
        .selectAll('text').attr('fill', '#333');

    // Style axis lines
    svg.selectAll('.domain').attr('stroke', '#dee2e6');
    svg.selectAll('.tick line').attr('stroke', '#dee2e6');

    // Measure rotated x-axis label extent, place legend below
    const xAxisBBox = xAxis.node().getBBox();
    const legendY = margin.top + height + xAxisBBox.height + 15;
    const legendHeight = layoutLegend(svg, COMPONENTS, margin.left, legendY, width);
    svg.attr('height', legendY + legendHeight + 10);

    updateBarSelection();
}

function updateBarSelection() {
    d3.selectAll('#barChart rect[data-run]').each(function() {
        const el = d3.select(this);
        if (el.attr('data-run') === selectedRunName) {
            el.attr('stroke', '#ffc107').attr('stroke-width', 3);
        } else {
            el.attr('stroke', 'none');
        }
    });
}

// ============================================================
// Grouped Bar Chart (By Component)
// ============================================================
function createGroupedBarChart(metrics) {
    const COMPONENTS = getComponents();
    const container = d3.select('#barChart');
    container.selectAll('*').remove();

    const margin = { top: 30, right: 20, bottom: 140, left: 60 };
    const width = container.node().getBoundingClientRect().width - margin.left - margin.right;
    const height = 420 - margin.top - margin.bottom;

    const svg = container.append('svg')
        .attr('width', width + margin.left + margin.right)
        .attr('height', height + margin.top + margin.bottom);
    const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`);

    const runNames = metrics.map(m => m.name);
    const keys = COMPONENTS.map(c => c.key);

    const stackData = metrics.map(m => {
        const d = { name: m.name };
        COMPONENTS.forEach(c => { d[c.key] = m[c.key] / 1000; });
        return d;
    });

    const maxY = d3.max(stackData, d => d3.max(keys, k => d[k]));

    const x0 = d3.scaleBand().domain(runNames).range([0, width]).padding(0.2);
    const x1 = d3.scaleBand().domain(keys).range([0, x0.bandwidth()]).padding(0.05);
    const y = d3.scaleLinear().domain([0, maxY * 1.08]).range([height, 0]);

    // Grid
    g.append('g').attr('class', 'grid')
        .call(d3.axisLeft(y).tickSize(-width).tickFormat(''))
        .selectAll('line').attr('stroke', '#dee2e6').attr('stroke-opacity', 0.7);
    g.selectAll('.grid .domain').remove();

    // Tooltip
    const tooltip = d3.select('body').append('div').attr('class', 'chart-tooltip').style('display', 'none');
    const colorMap = {};
    COMPONENTS.forEach(c => { colorMap[c.key] = c.color; });
    const labelMap = {};
    COMPONENTS.forEach(c => { labelMap[c.key] = c.label; });

    // Bars
    stackData.forEach(d => {
        keys.forEach(key => {
            g.append('rect')
                .attr('x', x0(d.name) + x1(key))
                .attr('y', y(d[key]))
                .attr('width', x1.bandwidth())
                .attr('height', height - y(d[key]))
                .attr('fill', colorMap[key])
                .attr('data-run', d.name)
                .style('cursor', 'pointer')
                .on('click', () => selectRun(d.name))
                .on('mouseover', (event) => {
                    tooltip.style('display', 'block')
                        .html(`<strong>${d.name}</strong><br>${labelMap[key]}: ${d[key].toFixed(1)}s`);
                })
                .on('mousemove', event => {
                    tooltip.style('left', (event.pageX + 12) + 'px').style('top', (event.pageY - 20) + 'px');
                })
                .on('mouseout', () => tooltip.style('display', 'none'));
        });
    });

    // Axes
    const xAxis = g.append('g').attr('transform', `translate(0,${height})`)
        .call(d3.axisBottom(x0));
    xAxis.selectAll('text').attr('fill', '#333')
        .attr('transform', 'rotate(-35)')
        .attr('text-anchor', 'end')
        .attr('dx', '-0.5em')
        .attr('dy', '0.3em');
    g.append('g')
        .call(d3.axisLeft(y).tickFormat(d => d.toFixed(1) + 's'))
        .selectAll('text').attr('fill', '#333');

    svg.selectAll('.domain').attr('stroke', '#dee2e6');
    svg.selectAll('.tick line').attr('stroke', '#dee2e6');

    // Measure rotated x-axis label extent, place legend below
    const xAxisBBox = xAxis.node().getBBox();
    const legendY = margin.top + height + xAxisBBox.height + 15;
    const legendHeight = layoutLegend(svg, COMPONENTS, margin.left, legendY, width);
    svg.attr('height', legendY + legendHeight + 10);

    updateBarSelection();
}

// ============================================================
// Chart Tab Switching
// ============================================================
function switchChartTab(tab) {
    currentChartTab = tab;
    document.getElementById('tabStacked').classList.toggle('active', tab === 'stacked');
    document.getElementById('tabGrouped').classList.toggle('active', tab === 'grouped');
    if (allMetrics.length > 0) {
        if (tab === 'stacked') createBarChart(allMetrics);
        else createGroupedBarChart(allMetrics);
    }
}

function renderCurrentChart() {
    if (currentChartTab === 'stacked') createBarChart(allMetrics);
    else createGroupedBarChart(allMetrics);
}

// ============================================================
// Summary Table
// ============================================================
const TABLE_COLUMNS = [
    { key: 'name', label: 'Experiment', fmt: v => v },
    { key: 'num_segments', label: 'Segments', fmt: v => v },
    { key: 'app_proof_cells', label: 'Cells', fmt: fmtCells },
    { key: 'total_proof_time_ms', label: 'Total Time', fmt: fmtSeconds },
];

// Detail tables: three separate tables shown in the experiment details pane.

const BASIC_STATS_ROWS_V1 = [
    { key: 'num_segments', label: 'Segments', fmt: v => v },
    { key: 'num_air_instances', label: 'AIR Instances', fmt: v => v.toLocaleString() },
    { key: 'app_proof_cols', label: 'Columns', fmt: v => v.toLocaleString() },
    { key: 'app_proof_cells', label: 'Cells', fmt: fmtCells },
    { key: 'app_proof_cells_used', label: 'Cells (without padding)', fmt: (v, m) => {
        const pct = m.app_proof_cells > 0 ? (v / m.app_proof_cells * 100) : 0;
        return `${fmtCells(v)} <span style="color:#999">(${pct.toFixed(1)}%)</span>`;
    }},
    { key: 'constraints', label: 'Constraints', fmt: v => v.toLocaleString() },
    { key: 'constraint_instances', label: 'Constraint Instances', fmt: fmtCells },
    { key: 'bus_interactions', label: 'Bus Interactions', fmt: v => v.toLocaleString() },
    { key: 'bus_interaction_messages', label: 'Bus Interaction Messages', fmt: fmtCells },
];

const BASIC_STATS_ROWS_V2 = [
    { key: 'num_segments', label: 'Segments', fmt: v => v },
    { key: 'num_air_instances', label: 'AIR Instances', fmt: v => v.toLocaleString() },
    { key: 'app_proof_cols', label: 'Columns', fmt: v => v.toLocaleString() },
    { key: 'app_proof_cells', label: 'Cells', fmt: fmtCells },
    { key: 'constraints', label: 'Constraints', fmt: v => v.toLocaleString() },
    { key: 'constraint_instances', label: 'Constraint Instances', fmt: fmtCells },
    { key: 'bus_interactions', label: 'Bus Interactions', fmt: v => v.toLocaleString() },
    { key: 'bus_interaction_messages', label: 'Bus Interaction Messages', fmt: fmtCells },
];

// Nested proof time breakdown — indent level controls visual nesting.
const PROOF_TIME_ROWS_V1 = [
    { key: 'execute_metered_time_ms', label: 'Metered Execution', fmt: fmtSeconds, indent: 0 },
    { key: 'app_proof_time_ms', label: 'App Proof Time', fmt: fmtSeconds, indent: 0 },
    { key: 'app_proof_time_excluding_trace_ms', label: 'STARK (excl. trace)', fmt: fmtSeconds, indent: 1 },
    { key: 'app_execute_preflight_time_ms', label: 'Preflight Execution', fmt: fmtSeconds, indent: 1 },
    { key: 'app_trace_gen_time_ms', label: 'Trace Gen', fmt: fmtSeconds, indent: 1 },
    { key: 'app_other_ms', label: 'Other / Overlap', fmt: fmtSeconds, indent: 1, muted: true },
    { key: 'leaf_proof_time_ms', label: 'Leaf Recursion', fmt: fmtSeconds, indent: 0 },
    { key: 'inner_recursion_proof_time_ms', label: 'Inner Recursion', fmt: fmtSeconds, indent: 0 },
    { key: 'total_proof_time_ms', label: 'Total', fmt: fmtSeconds, indent: 0, bold: true },
];

const PROOF_TIME_ROWS_V2 = [
    { key: 'execute_metered_time_ms', label: 'Metered Execution', fmt: fmtSeconds, indent: 0 },
    { key: 'app_proof_time_ms', label: 'App Proof Time', fmt: fmtSeconds, indent: 0 },
    { key: 'app_proof_time_excluding_trace_ms', label: 'STARK (excl. trace)', fmt: fmtSeconds, indent: 1 },
    { key: 'app_rap_constraints_time_ms', label: 'Constraints', fmt: fmtSeconds, indent: 2 },
    { key: 'app_rap_logup_gkr_time_ms', label: 'LogUp GKR', fmt: fmtSeconds, indent: 3 },
    { key: 'app_rap_round0_time_ms', label: 'Round 0', fmt: fmtSeconds, indent: 3 },
    { key: 'app_rap_mle_rounds_time_ms', label: 'MLE Rounds', fmt: fmtSeconds, indent: 3 },
    { key: 'app_rap_other_ms', label: 'Other', fmt: fmtSeconds, indent: 3, muted: true },
    { key: 'app_openings_time_ms', label: 'Openings', fmt: fmtSeconds, indent: 2 },
    { key: 'app_openings_whir_time_ms', label: 'WHIR', fmt: fmtSeconds, indent: 3 },
    { key: 'app_openings_stacked_reduction_time_ms', label: 'Stacked Reduction', fmt: fmtSeconds, indent: 3 },
    { key: 'app_openings_other_ms', label: 'Other', fmt: fmtSeconds, indent: 3, muted: true },
    { key: 'app_trace_commit_time_ms', label: 'Trace Commit', fmt: fmtSeconds, indent: 2 },
    { key: 'app_stark_other_ms', label: 'Other', fmt: fmtSeconds, indent: 2, muted: true },
    { key: 'app_execute_preflight_time_ms', label: 'Preflight Execution', fmt: fmtSeconds, indent: 1 },
    { key: 'app_set_initial_memory_time_ms', label: 'Set Initial Memory', fmt: fmtSeconds, indent: 1 },
    { key: 'app_trace_gen_time_ms', label: 'Trace Gen', fmt: fmtSeconds, indent: 1 },
    { key: 'app_other_ms', label: 'Other', fmt: fmtSeconds, indent: 1, muted: true },
    { key: 'leaf_proof_time_ms', label: 'Leaf Recursion', fmt: fmtSeconds, indent: 0 },
    { key: 'inner_recursion_proof_time_ms', label: 'Inner Recursion', fmt: fmtSeconds, indent: 0 },
    { key: 'compression_proof_time_ms', label: 'Compression', fmt: fmtSeconds, indent: 0 },
    { key: 'total_proof_time_ms', label: 'Total', fmt: fmtSeconds, indent: 0, bold: true },
];

function getBasicStatsRows() {
    return detectedOpenVmVersion === 2 ? BASIC_STATS_ROWS_V2 : BASIC_STATS_ROWS_V1;
}

function getProofTimeRows() {
    return detectedOpenVmVersion === 2 ? PROOF_TIME_ROWS_V2 : PROOF_TIME_ROWS_V1;
}

const CELL_DISTRIBUTION_ROWS = [
    { key: 'powdr_ratio', label: 'Powdr', fmt: fmtPct, noCompare: true },
    { key: 'normal_instruction_ratio', label: 'Normal Instructions', fmt: fmtPct, noCompare: true },
    { key: 'openvm_precompile_ratio', label: 'OpenVM Precompiles', fmt: fmtPct, noCompare: true },
];

let tableSortKey = 'name';
let tableSortAsc = true;

function getBaselineMetrics() {
    return allMetrics.find(m => m.name === baselineRunName) || null;
}

function selectBaseline(runName) {
    baselineRunName = runName;
    renderTableBody(allMetrics);
    if (selectedRunName) renderDetails(selectedRunName);
    updateUrl();
}

function createSummaryTable(metrics) {
    const table = document.getElementById('summaryTable');
    const thead = table.querySelector('thead tr');
    const tbody = table.querySelector('tbody');

    thead.innerHTML = '';
    // Baseline radio column
    const thBaseline = document.createElement('th');
    thBaseline.textContent = 'Baseline';
    thBaseline.classList.add('baseline-radio');
    thead.appendChild(thBaseline);

    TABLE_COLUMNS.forEach(col => {
        const th = document.createElement('th');
        th.textContent = col.label;
        th.onclick = () => {
            if (tableSortKey === col.key) {
                tableSortAsc = !tableSortAsc;
            } else {
                tableSortKey = col.key;
                tableSortAsc = true;
            }
            renderTableBody(metrics);
        };
        thead.appendChild(th);
    });

    renderTableBody(metrics);
}

function renderTableBody(metrics) {
    const tbody = document.querySelector('#summaryTable tbody');
    tbody.innerHTML = '';
    const baseline = getBaselineMetrics();

    const sorted = [...metrics].sort((a, b) => {
        const va = a[tableSortKey], vb = b[tableSortKey];
        const cmp = typeof va === 'string' ? va.localeCompare(vb) : va - vb;
        return tableSortAsc ? cmp : -cmp;
    });

    sorted.forEach(m => {
        const tr = document.createElement('tr');
        if (m.name === selectedRunName) tr.classList.add('selected');
        tr.onclick = (e) => { if (e.target.tagName !== 'INPUT') selectRun(m.name); };

        // Baseline radio
        const tdRadio = document.createElement('td');
        tdRadio.classList.add('baseline-radio');
        const radio = document.createElement('input');
        radio.type = 'radio';
        radio.name = 'baseline';
        radio.checked = m.name === baselineRunName;
        radio.onclick = (e) => { e.stopPropagation(); selectBaseline(m.name); };
        tdRadio.appendChild(radio);
        tr.appendChild(tdRadio);

        TABLE_COLUMNS.forEach(col => {
            const td = document.createElement('td');
            const raw = m[col.key];
            const formatted = col.fmt(raw);
            if (raw != null && typeof raw === 'number') td.title = raw.toLocaleString();
            if (col.key === 'name' || !baseline || m.name === baselineRunName) {
                td.textContent = formatted;
            } else {
                td.innerHTML = formatted + ' ' + fmtComparison(raw, baseline[col.key]);
            }
            tr.appendChild(td);
        });
        tbody.appendChild(tr);
    });
}

function updateTableSelection() {
    document.querySelectorAll('#summaryTable tbody tr').forEach(tr => {
        const name = tr.querySelectorAll('td')[1]?.textContent;
        tr.classList.toggle('selected', name === selectedRunName);
    });
}

// ============================================================
// Experiment Details (details table + pie chart)
// ============================================================
function renderDetailTable(title, rows, m, opts = {}) {
    const baseline = getBaselineMetrics();
    const tableId = 'detail-' + title.replace(/\s+/g, '-').toLowerCase();

    // Filter out negligible "other" rows
    const visibleRows = rows.filter(row => {
        if (row.muted) {
            const val = m[row.key];
            return val != null && Math.abs(val) >= 1;
        }
        return true;
    });

    // Determine which rows are parents (have children at a deeper indent)
    const parentIndices = new Set();
    for (let i = 0; i < visibleRows.length; i++) {
        const indent = visibleRows[i].indent || 0;
        if (i + 1 < visibleRows.length && (visibleRows[i + 1].indent || 0) > indent) {
            parentIndices.add(i);
        }
    }

    // Default expand level: indent 0 parents are expanded (their indent-1 children visible),
    // deeper parents are collapsed. User overrides via expandState take precedence.
    const defaultExpandLevel = opts.defaultExpandLevel != null ? opts.defaultExpandLevel : 0;

    // Pre-compute whether each parent is expanded, respecting persistent expandState.
    const parentExpanded = new Map(); // index -> boolean
    for (const i of parentIndices) {
        const stateKey = tableId + ':' + visibleRows[i].key;
        if (stateKey in expandState) {
            parentExpanded.set(i, expandState[stateKey]);
        } else {
            parentExpanded.set(i, (visibleRows[i].indent || 0) <= defaultExpandLevel);
        }
    }

    // Determine visibility: a row is a collapsed-child if any ancestor parent is collapsed.
    function isHidden(rowIndex) {
        const indent = visibleRows[rowIndex].indent || 0;
        if (indent === 0) return false;
        // Walk backwards to find ancestor parents
        for (let j = rowIndex - 1; j >= 0; j--) {
            const pIndent = visibleRows[j].indent || 0;
            if (pIndent < indent && parentIndices.has(j)) {
                if (!parentExpanded.get(j)) return true;
                // Check this parent's ancestors too
                return isHidden(j);
            }
        }
        return false;
    }

    let html = `<h6 style="color:#333; font-weight:600; margin-bottom:0.3rem; margin-top:0.75rem">${title}</h6>`;
    html += `<table class="details-table" id="${tableId}"><tbody>`;
    visibleRows.forEach((row, i) => {
        const indent = row.indent || 0;
        const val = m[row.key];
        const isParent = parentIndices.has(i);

        const classes = [];
        if (isParent) classes.push('collapsible-parent');
        if (isParent && !parentExpanded.get(i)) classes.push('collapsed');
        if (isHidden(i)) classes.push('collapsed-child');

        const padding = indent > 0 ? `padding-left:${indent * 1.2 + 0.5}em` : '';
        const weight = row.bold ? 'font-weight:700' : '';
        const muted = row.muted ? 'color:#999; font-style:italic' : '';
        const thStyle = [padding, weight, muted].filter(Boolean).join(';');
        const tdStyle = [weight, muted].filter(Boolean).join(';');
        let formatted;
        if (val == null) {
            formatted = '<span style="color:#999">N/A</span>';
        } else if (row.muted && val < 0) {
            formatted = `<span style="color:#c08000">${fmtSeconds(-val)} overlap</span>`;
        } else {
            formatted = row.fmt(val, m);
        }
        const comparison = (baseline && m.name !== baselineRunName && !row.noCompare && !row.muted)
            ? ' ' + fmtComparison(val, baseline[row.key])
            : '';
        const arrow = isParent ? `<span class="toggle-arrow">&#9660;</span> ` : '';
        const info = METRIC_INFO[row.key];
        let infoIcon = '';
        if (info) {
            const ver = detectedOpenVmVersion || 1;
            const desc = (info['descV' + ver] || info.desc || '').replace(/"/g, '&quot;');
            const code = (info['codeV' + ver] || info.code || '').replace(/"/g, '&quot;');
            infoIcon = `<span class="metric-info" data-desc="${desc}" data-code="${code}">i</span>`;
        }
        const classAttr = classes.length ? ` class="${classes.join(' ')}"` : '';
        const dataKey = isParent ? ` data-key="${row.key}"` : '';
        html += `<tr${classAttr} data-indent="${indent}"${dataKey}>`;
        html += `<th${thStyle ? ` style="${thStyle}"` : ''}>${arrow}${row.label}${infoIcon}</th>`;
        const rawTitle = (val != null && typeof val === 'number') ? ` title="${val.toLocaleString()}"` : '';
        html += `<td${tdStyle ? ` style="${tdStyle}"` : ''}${rawTitle}>${formatted}${comparison}</td>`;
        if (opts.pctOfKey) {
            const total = m[opts.pctOfKey];
            const pct = (total > 0 && val != null) ? (val / total * 100).toFixed(1) + '%' : '';
            html += `<td style="color:#888; text-align:right${weight ? ';' + weight : ''}">${pct}</td>`;
        }
        html += '</tr>';
    });
    html += '</tbody></table>';
    return html;
}

// Shared tooltip element for metric info icons.
const metricInfoTip = document.createElement('div');
metricInfoTip.className = 'metric-info-tooltip';
document.body.appendChild(metricInfoTip);

document.addEventListener('mouseenter', function(e) {
    if (!e.target.classList || !e.target.classList.contains('metric-info')) return;
    const icon = e.target;
    const desc = icon.dataset.desc || '';
    const code = icon.dataset.code || '';
    metricInfoTip.innerHTML = desc + (code ? '<code>' + highlightPython(code) + '</code>' : '');
    metricInfoTip.style.display = 'block';
    const rect = icon.getBoundingClientRect();
    let left = rect.right + 6;
    let top = rect.top - 4;
    if (left + 510 > window.innerWidth) left = rect.left - 510;
    if (top + metricInfoTip.offsetHeight > window.innerHeight) top = window.innerHeight - metricInfoTip.offsetHeight - 8;
    if (top < 4) top = 4;
    metricInfoTip.style.left = left + 'px';
    metricInfoTip.style.top = top + 'px';
}, true);
document.addEventListener('mouseleave', function(e) {
    if (!e.target.classList || !e.target.classList.contains('metric-info')) return;
    metricInfoTip.style.display = 'none';
}, true);

// Persistent expand/collapse state: maps "tableId:rowKey" -> true (expanded) | false (collapsed).
// null means "use default". Once a user clicks, the choice sticks across experiment switches.
const expandState = {};

// Toggle collapsible rows: clicking a parent shows/hides its children.
document.addEventListener('click', function(e) {
    if (e.target.closest('.metric-info')) return;
    const parentRow = e.target.closest('tr.collapsible-parent');
    if (!parentRow) return;
    const tbody = parentRow.closest('tbody');
    if (!tbody) return;

    const parentIndent = parseInt(parentRow.dataset.indent) || 0;
    const isCollapsing = !parentRow.classList.contains('collapsed');
    parentRow.classList.toggle('collapsed');

    // Persist expand/collapse state
    const tableId = parentRow.closest('table')?.id;
    const rowKey = parentRow.dataset.key;
    if (tableId && rowKey) expandState[tableId + ':' + rowKey] = !isCollapsing;

    // Walk subsequent rows: toggle children (indent > parentIndent),
    // stop when we hit a row at parentIndent or shallower.
    let sibling = parentRow.nextElementSibling;
    while (sibling) {
        const sibIndent = parseInt(sibling.dataset.indent) || 0;
        if (sibIndent <= parentIndent) break;
        if (isCollapsing) {
            sibling.classList.add('collapsed-child');
            // Also collapse any nested parents
            if (sibling.classList.contains('collapsible-parent')) {
                sibling.classList.add('collapsed');
            }
        } else {
            // Only show direct children (indent == parentIndent + 1).
            // Deeper rows stay hidden unless their own parent is expanded.
            if (sibIndent === parentIndent + 1) {
                sibling.classList.remove('collapsed-child');
            }
        }
        sibling = sibling.nextElementSibling;
    }
});

function renderDetails(runName) {
    const section = document.getElementById('detailsSection');
    section.innerHTML = '';
    document.getElementById('pieRunName').textContent = runName ? `(${runName})` : '';

    if (!runName || !combinedData[runName]) {
        section.innerHTML = '<div class="pie-placeholder">Select an experiment to view details</div>';
        return;
    }

    const m = allMetrics.find(m => m.name === runName);
    if (!m) return;

    // Three detail tables + pie chart side by side
    let html = '<div class="details-row">';
    html += '<div class="details-table-col">';
    html += renderDetailTable('App Proof Basic Stats', getBasicStatsRows(), m);
    html += renderDetailTable('Proof Time', getProofTimeRows(), m, { pctOfKey: 'total_proof_time_ms', defaultExpandLevel: 0 });
    html += renderDetailTable('Trace Cell Distribution', CELL_DISTRIBUTION_ROWS, m);
    html += '</div>';
    html += '<div class="details-pie-col"><h6 style="color:#333; font-weight:600; margin-bottom:0.5rem; margin-top:0.75rem">Trace Cells by AIR</h6><div id="pieChart"></div></div>';
    html += '</div>';
    section.innerHTML = html;

    // Pie chart
    createPieChart(runName);
}

function createPieChart(runName) {
    const container = d3.select('#pieChart');
    container.selectAll('*').remove();

    const { items, total } = computeCellsByAir(combinedData[runName]);
    if (items.length === 0) {
        container.append('div').attr('class', 'pie-placeholder').text('No cell data available');
        return;
    }

    const size = 320;
    const radius = size / 2 - 10;
    const svg = container.append('svg')
        .attr('width', size)
        .attr('height', size + items.length * 18 + 20);
    const g = svg.append('g').attr('transform', `translate(${size/2},${size/2})`);

    const POWDR_COLOR = '#e03e1a';
    const nonPowdrItems = items.filter(d => !d.name.startsWith('PowdrAir'));
    const spectralColors = d3.quantize(t => d3.interpolateSpectral(t * 0.8 + 0.1), Math.max(nonPowdrItems.length, 3));
    const colorMap = {};
    let ci = 0;
    items.forEach(d => {
        colorMap[d.name] = d.name.startsWith('PowdrAir') ? POWDR_COLOR : spectralColors[ci++];
    });
    const colors = name => colorMap[name];

    const pie = d3.pie().value(d => d.value).sort(null);
    const arc = d3.arc().innerRadius(0).outerRadius(radius);

    const arcs = g.selectAll('arc')
        .data(pie(items))
        .enter().append('g');

    arcs.append('path')
        .attr('d', arc)
        .attr('fill', d => colors(d.data.name))
        .attr('stroke', 'white')
        .attr('stroke-width', 1.5);

    // Labels for big slices
    const labelArcs = arcs.filter(d => d.data.value / total > 0.05);
    labelArcs.append('text')
        .attr('transform', d => `translate(${arc.centroid(d)})`)
        .attr('text-anchor', 'middle')
        .attr('fill', '#333')
        .attr('font-size', '10px')
        .attr('font-weight', 'bold')
        .each(function(d) {
            const pct = (d.data.value / total * 100).toFixed(1);
            const billions = (d.data.value / 1e9).toFixed(2);
            d3.select(this).append('tspan').attr('x', 0).attr('dy', '-0.4em').text(`${pct}%`);
            d3.select(this).append('tspan').attr('x', 0).attr('dy', '1.2em').text(`${billions}B`);
        });

    // Legend below pie
    const legendG = svg.append('g')
        .attr('transform', `translate(10, ${size + 10})`);

    items.forEach((item, i) => {
        const lg = legendG.append('g').attr('transform', `translate(0, ${i * 18})`);
        lg.append('rect').attr('width', 12).attr('height', 12).attr('fill', colors(item.name)).attr('rx', 2);
        const pct = (item.value / total * 100).toFixed(1);
        lg.append('text').attr('x', 18).attr('y', 10)
            .attr('fill', '#333').attr('font-size', '11px')
            .text(`${pct}% - ${item.name}`);
    });
}

// ============================================================
// Selection
// ============================================================
function selectRun(runName) {
    if (selectedRunName === runName) {
        selectedRunName = null;
    } else {
        selectedRunName = runName;
    }
    updateBarSelection();
    updateTableSelection();
    renderDetails(selectedRunName);
    updateUrl();
}

// ============================================================
// Data Loading
// ============================================================
function processAndRender(data, sourceLabel = '') {
    combinedData = normalizeMetricsData(data, sourceLabel);
    detectedOpenVmVersion = detectOpenVmVersion(combinedData);
    allMetrics = Object.entries(combinedData).map(([name, json]) => extractMetrics(name, json));

    if (allMetrics.length === 1) {
        selectedRunName = allMetrics[0].name;
    } else if (selectedRunName && !allMetrics.find(m => m.name === selectedRunName)) {
        selectedRunName = null;
    }

    // Set default baseline if not already set from URL
    if (!baselineRunName || !allMetrics.find(m => m.name === baselineRunName)) {
        const apc000 = allMetrics.find(m => m.name === 'apc000');
        baselineRunName = apc000 ? 'apc000' : (allMetrics[0]?.name || null);
    }

    const badge = document.getElementById('versionBadge');
    badge.textContent = `OpenVM ${detectedOpenVmVersion}`;
    badge.style.display = 'inline';

    document.getElementById('uploadSection').style.display = 'none';
    document.getElementById('appSection').style.display = 'block';

    createSummaryTable(allMetrics);
    renderCurrentChart();
    renderDetails(selectedRunName);
    updateUrl();
}

function handleFile(file) {
    const reader = new FileReader();
    reader.onload = e => {
        try {
            const data = JSON.parse(e.target.result);
            updateDataSourceDisplay(file.name);
            processAndRender(data, file.name);
        } catch (err) {
            alert('Failed to parse JSON: ' + err.message);
        }
    };
    reader.readAsText(file);
}

async function loadFromUrl(url, updateUrlParam = true) {
    // Convert GitHub blob URLs to raw
    const rawUrl = url.replace(
        /github\.com\/([^/]+)\/([^/]+)\/blob\/([^/]+)\/(.*)/,
        'raw.githubusercontent.com/$1/$2/$3/$4'
    );

    document.getElementById('loadingOverlay').style.display = 'flex';
    try {
        const resp = await fetch(rawUrl);
        if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
        const data = await resp.json();
        updateDataSourceDisplay(url);
        if (updateUrlParam) {
            const params = new URLSearchParams(window.location.search);
            params.set('data', url);
            history.replaceState(null, '', window.location.pathname + '?' + params.toString());
        }
        processAndRender(data, url);
    } catch (err) {
        alert('Failed to load: ' + err.message);
    } finally {
        document.getElementById('loadingOverlay').style.display = 'none';
    }
}

// ============================================================
// URL Parameters
// ============================================================
function updateUrl() {
    const params = new URLSearchParams(window.location.search);
    if (selectedRunName) {
        params.set('run', selectedRunName);
    } else {
        params.delete('run');
    }
    if (baselineRunName) {
        params.set('baseline', baselineRunName);
    } else {
        params.delete('baseline');
    }
    const newUrl = window.location.pathname + '?' + params.toString();
    history.replaceState(null, '', newUrl);
}

// ============================================================
// Event Handlers
// ============================================================
const dropzone = document.getElementById('dropzone');
const fileInput = document.getElementById('fileInput');

dropzone.addEventListener('click', () => fileInput.click());
dropzone.addEventListener('dragover', e => { e.preventDefault(); dropzone.classList.add('drag-over'); });
dropzone.addEventListener('dragleave', () => dropzone.classList.remove('drag-over'));
dropzone.addEventListener('drop', e => {
    e.preventDefault();
    dropzone.classList.remove('drag-over');
    if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
});
fileInput.addEventListener('change', () => { if (fileInput.files.length) handleFile(fileInput.files[0]); });

document.getElementById('urlLoadBtn').addEventListener('click', () => {
    const url = document.getElementById('urlInput').value.trim();
    if (url) loadFromUrl(url);
});
document.getElementById('urlInput').addEventListener('keydown', e => {
    if (e.key === 'Enter') {
        const url = e.target.value.trim();
        if (url) loadFromUrl(url);
    }
});

// Responsive chart redraw
window.addEventListener('resize', () => {
    if (allMetrics.length > 0) {
        renderCurrentChart();
        if (selectedRunName) renderDetails(selectedRunName);
    }
});

// Auto-load from URL params
(function init() {
    const params = new URLSearchParams(window.location.search);
    const dataUrl = params.get('data') || params.get('url');
    const run = params.get('run');
    const baseline = params.get('baseline');
    if (run) selectedRunName = run;
    if (baseline) baselineRunName = baseline;
    if (dataUrl) loadFromUrl(dataUrl, false);
})();
</script>
</body>
</html>


================================================
FILE: openvm/metrics-viewer/spec.py
================================================
#!/usr/bin/env python3
"""
Audit script for OpenVM metrics viewer.

Recomputes the experiment details table from a metrics JSON file,
printing all computed values so they can be verified against the web UI.

Usage:
    python3 audit_metrics.py <metrics_file_or_url> [experiment_name]

The source can be a local file path or an HTTP(S) URL.
GitHub blob URLs are auto-converted to raw URLs.

If the input is a combined metrics file and no experiment is given,
lists available experiments.
"""

from __future__ import annotations

import json
import re
import sys
import urllib.request
from typing import Any, Callable, Literal

# A single flattened metric entry: {"group": ..., "air_name": ..., "air_id": ..., "metric": ..., "value": ..., ...}
Entry = dict[str, str]
# Raw metrics JSON with "counter" and "gauge" arrays
MetricsJson = dict[str, Any]
# Computed metrics dict returned by extract_metrics
Metrics = dict[str, Any]


# ============================================================
# Computation — this is the code that needs to be audited.
# It must match the JS in index.html exactly.
# ============================================================

def load_metrics_dataframes(
    metrics_json: MetricsJson,
) -> tuple[list[Entry], list[Entry], list[Entry], list[Entry], list[Entry]]:
    """Port of loadMetricsDataframes: flatten entries, split by group prefix."""
    entries: list[Entry] = []
    for c in metrics_json["counter"] + metrics_json["gauge"]:
        obj = dict(c["labels"])
        obj["metric"] = c["metric"]
        obj["value"] = c["value"]
        entries.append(obj)

    app = [e for e in entries if e.get("group", "").startswith("app_proof")]
    if not app:
        app = [e for e in entries if e.get("group", "").startswith("reth")]
    leaf = [e for e in entries if e.get("group", "").startswith("leaf")]
    internal = [e for e in entries if e.get("group", "").startswith("internal")]
    compression = [e for e in entries if e.get("group", "") == "compression"]

    return entries, app, leaf, internal, compression


def is_normal_instruction_air(name: str) -> bool:
    """Port of isNormalInstructionAir."""
    m = re.match(r"^VmAirWrapper<[^,]+,\s*([^>]+?)(?:<(\d+)(?:,\s*\d+)*>)?\s*>$", name or "")
    if not m:
        return False
    if m.group(1) == "FieldExpressionCoreAir":
        return False
    if m.group(2) and int(m.group(2)) != 4:
        return False
    return True


def sum_metric(entries: list[Entry], metric_name: str) -> float:
    """Sum values for entries matching metric_name."""
    return sum(float(e["value"]) for e in entries if e["metric"] == metric_name)


def unique_metric(entries: list[Entry], metric_name: str) -> float:
    """Get the value of a metric that must appear exactly once."""
    matches = [e for e in entries if e["metric"] == metric_name]
    assert len(matches) == 1, f"Expected exactly 1 entry for '{metric_name}', found {len(matches)}"
    return float(matches[0]["value"])


def detect_version(metrics_json: MetricsJson) -> Literal[1, 2]:
    """Returns 2 if any metric name contains 'logup_gkr' (V2-only), else 1."""
    names = {e["metric"] for e in metrics_json["counter"] + metrics_json["gauge"]}
    return 2 if any("logup_gkr" in n for n in names) else 1


def extract_metrics(run_name: str, metrics_json: MetricsJson) -> Metrics:
    """Port of extractMetrics from index.html. Returns dict of all computed values."""
    all_entries, app, leaf, internal, compression = load_metrics_dataframes(metrics_json)
    m: Metrics = {}
    m["name"] = run_name

    # --- Classify app AIRs ---
    powdr_air = [e for e in app if (e.get("air_name") or "").startswith("PowdrAir")]
    non_powdr = [e for e in app if not (e.get("air_name") or "").startswith("PowdrAir")]
    normal_air = [e for e in non_powdr if is_normal_instruction_air(e.get("air_name", ""))]
    precompile_air = [e for e in non_powdr if not is_normal_instruction_air(e.get("air_name", ""))]

    # --- Basic stats ---
    m["app_proof_cols"] = sum_metric(app, "main_cols") + sum_metric(app, "prep_cols") + sum_metric(app, "perm_cols")
    segments = [int(e["segment"]) for e in app if "segment" in e]
    m["num_segments"] = max(segments, default=-1) + 1
    m["num_air_instances"] = len([e for e in app if e["metric"] == "rows"])
    m["app_proof_cells"] = sum_metric(app, "total_cells")
    m["app_proof_cells_used"] = sum_metric(app, "total_cells_used")  # V1 only

    # --- Constraints & bus interactions ---
    has_constraints = any(e["metric"] == "constraints" for e in all_entries)
    has_interactions = any(e["metric"] == "interactions" for e in all_entries)

    # Rows & segments by AIR, summed over all segments.
    # We key by (air_id, air_name) because air_id alone is only unique within a proving
    # phase — different phases (app, leaf, compression) reuse the same air_id for
    # unrelated AIRs. Keying by the pair is a pragmatic fix: it would break if the same
    # (air_id, air_name) tuple appeared in two different phases, but that is unlikely
    # since each phase uses a distinct AIR set.
    segments_by_app_air: dict[str, float] = {}
    rows_by_app_air: dict[str, float] = {}
    for e in app:
        # Rows are indicated per segment and AIR
        if e["metric"] == "rows":
            key = f"{e['air_id']}:{e.get('air_name', '')}"
            segments_by_app_air[key] = segments_by_app_air.get(key, 0) + 1
            rows_by_app_air[key] = rows_by_app_air.get(key, 0) + float(e["value"])

    # Constraints and interactions are listed per AIR.
    # For the number of constraints and interactions, we weight by the number of segments for that AIR;
    # for the number of instances and messages, we weight by the number of rows (across all segments).
    def weighted_sum(metric_name: str, weights: dict[str, float]) -> float:
        return sum(
            float(e["value"]) * weights.get(f"{e['air_id']}:{e.get('air_name', '')}", 0)
            for e in all_entries if e["metric"] == metric_name
        )

    m["constraints"] = weighted_sum("constraints", segments_by_app_air) if has_constraints else None
    m["bus_interactions"] = weighted_sum("interactions", segments_by_app_air) if has_interactions else None
    m["constraint_instances"] = weighted_sum("constraints", rows_by_app_air) if has_constraints else None
    m["bus_interaction_messages"] = weighted_sum("interactions", rows_by_app_air) if has_interactions else None

    # --- Proof times by phase ---
    # execute_metered runs *before* segment proving and is outside per-segment
    # total_proof_time_ms. We report it as a separate top-level phase.
    m["execute_metered_time_ms"] = sum_metric(app, "execute_metered_time_ms")
    m["app_proof_time_ms"] = sum_metric(app, "total_proof_time_ms")
    m["leaf_proof_time_ms"] = sum_metric(leaf, "total_proof_time_ms")
    m["inner_recursion_proof_time_ms"] = sum_metric(internal, "total_proof_time_ms")
    m["compression_proof_time_ms"] = sum_metric(compression, "total_proof_time_ms")
    m["total_proof_time_ms"] = (m["execute_metered_time_ms"] + m["app_proof_time_ms"]
        + m["leaf_proof_time_ms"] + m["inner_recursion_proof_time_ms"]
        + m["compression_proof_time_ms"])

    # --- STARK time excluding trace ---
    m["app_proof_time_excluding_trace_ms"] = sum_metric(app, "stark_prove_excluding_trace_time_ms")

    # --- App time sub-components ---
    m["app_execute_preflight_time_ms"] = sum_metric(app, "execute_preflight_time_ms")
    m["app_trace_gen_time_ms"] = sum_metric(app, "trace_gen_time_ms")
    m["app_set_initial_memory_time_ms"] = sum_metric(app, "set_initial_memory_time_ms")  # V2 only

    # --- V2: STARK sub-components (prover.*) ---
    m["app_trace_commit_time_ms"] = sum_metric(app, "prover.main_trace_commit_time_ms")
    m["app_rap_constraints_time_ms"] = sum_metric(app, "prover.rap_constraints_time_ms")
    m["app_openings_time_ms"] = sum_metric(app, "prover.openings_time_ms")
    m["app_stark_other_ms"] = (m["app_proof_time_excluding_trace_ms"]
        - m["app_trace_commit_time_ms"] - m["app_rap_constraints_time_ms"] - m["app_openings_time_ms"])

    # --- V2: rap_constraints sub-components ---
    m["app_rap_logup_gkr_time_ms"] = sum_metric(app, "prover.rap_constraints.logup_gkr_time_ms")
    m["app_rap_round0_time_ms"] = sum_metric(app, "prover.rap_constraints.round0_time_ms")
    m["app_rap_mle_rounds_time_ms"] = sum_metric(app, "prover.rap_constraints.mle_rounds_time_ms")
    m["app_rap_other_ms"] = (m["app_rap_constraints_time_ms"]
        - m["app_rap_logup_gkr_time_ms"] - m["app_rap_round0_time_ms"] - m["app_rap_mle_rounds_time_ms"])

    # --- V2: openings sub-components ---
    m["app_openings_whir_time_ms"] = sum_metric(app, "prover.openings.whir_time_ms")
    m["app_openings_stacked_reduction_time_ms"] = sum_metric(app, "prover.openings.stacked_reduction_time_ms")
    m["app_openings_other_ms"] = (m["app_openings_time_ms"]
        - m["app_openings_whir_time_ms"] - m["app_openings_stacked_reduction_time_ms"])

    # --- App other (residual) ---
    # execute_metered is a separate top-level phase, not inside app_proof_time_ms.
    m["app_other_ms"] = (m["app_proof_time_ms"]
        - m["app_proof_time_excluding_trace_ms"]
        - m["app_execute_preflight_time_ms"]
        - m["app_trace_gen_time_ms"] - m["app_set_initial_memory_time_ms"])

    # --- Cell ratios ---
    total = m["app_proof_cells"]
    m["powdr_ratio"] = sum_metric(powdr_air, "cells") / total if total > 0 else 0
    m["normal_instruction_ratio"] = sum_metric(normal_air, "cells") / total if total > 0 else 0
    m["openvm_precompile_ratio"] = sum_metric(precompile_air, "cells") / total if total > 0 else 0

    return m


# ============================================================
# Presentation — formatting and printing (not part of audit)
# ============================================================

Formatter = Callable[[float], str]

# Basic stats row: (key, label, formatter)
BasicRow = tuple[str, str, Formatter]
# Proof time row: (key, label, indent, flags)  — flags: b=bold, r=residual
ProofRow = tuple[str, str, int, str]
# Union of row types used by print_section
Row = BasicRow | ProofRow


def fmt_ms(ms: float) -> str:
    return f"{ms / 1000:.2f}s ({ms:.0f} ms)"

def fmt_cells(v: float) -> str:
    for threshold, suffix in [(1e9, "B"), (1e6, "M"), (1e3, "K")]:
        if v >= threshold:
            return f"{v / threshold:.2f}{suffix} ({v:,.0f})"
    return f"{v:,.0f}"

def fmt_int(v: float) -> str:
    return f"{v:,.0f}"

def fmt_pct(v: float) -> str:
    return f"{v * 100:.1f}%"


BASIC_STATS_V1: list[BasicRow] = [
    ("num_segments",            "Segments",                     lambda v: str(int(v))),
    ("num_air_instances",       "AIR Instances",                fmt_int),
    ("app_proof_cols",          "Columns",                  fmt_int),
    ("app_proof_cells",         "Cells",                    fmt_cells),
    ("app_proof_cells_used",    "Cells (without padding)",  fmt_cells),
    ("constraints",             "Constraints",                  fmt_int),
    ("constraint_instances",    "Constraint Instances",         fmt_cells),
    ("bus_interactions",        "Bus Interactions",              fmt_int),
    ("bus_interaction_messages", "Bus Interaction Messages",     fmt_cells),
]

BASIC_STATS_V2: list[BasicRow] = [r for r in BASIC_STATS_V1 if r[0] != "app_proof_cells_used"]

PROOF_TIME_V1: list[ProofRow] = [
    ("execute_metered_time_ms",          "Metered Execution",     0, ""),
    ("app_proof_time_ms",                "App Proof Time",        0, ""),
    ("app_proof_time_excluding_trace_ms","  STARK (excl. trace)", 1, ""),
    ("app_execute_preflight_time_ms",    "  Preflight Execution", 1, ""),
    ("app_trace_gen_time_ms",            "  Trace Gen",           1, ""),
    ("app_other_ms",                     "  Other / Overlap",     1, "r"),
    ("leaf_proof_time_ms",               "Leaf Recursion",        0, ""),
    ("inner_recursion_proof_time_ms",    "Inner Recursion",       0, ""),
    ("total_proof_time_ms",              "Total",                 0, ""),
]

PROOF_TIME_V2: list[ProofRow] = [
    ("execute_metered_time_ms",              "Metered Execution",     0, ""),
    ("app_proof_time_ms",                    "App Proof Time",        0, ""),
    ("app_proof_time_excluding_trace_ms",    "  STARK (excl. trace)", 1, ""),
    ("app_rap_constraints_time_ms",          "    Constraints",       2, ""),
    ("app_rap_logup_gkr_time_ms",           "      LogUp GKR",       3, ""),
    ("app_rap_round0_time_ms",              "      Round 0",         3, ""),
    ("app_rap_mle_rounds_time_ms",          "      MLE Rounds",      3, ""),
    ("app_rap_other_ms",                    "      Other",           3, "r"),
    ("app_openings_time_ms",                "    Openings",          2, ""),
    ("app_openings_whir_time_ms",           "      WHIR",            3, ""),
    ("app_openings_stacked_reduction_time_ms","      Stacked Reduction", 3, ""),
    ("app_openings_other_ms",               "      Other",           3, "r"),
    ("app_trace_commit_time_ms",            "    Trace Commit",      2, ""),
    ("app_stark_other_ms",                  "    Other",             2, "r"),
    ("app_execute_preflight_time_ms",       "  Preflight Execution", 1, ""),
    ("app_set_initial_memory_time_ms",      "  Set Initial Memory",  1, ""),
    ("app_trace_gen_time_ms",               "  Trace Gen",           1, ""),
    ("app_other_ms",                        "  Other",               1, "r"),
    ("leaf_proof_time_ms",                  "Leaf Recursion",        0, ""),
    ("inner_recursion_proof_time_ms",       "Inner Recursion",       0, ""),
    ("compression_proof_time_ms",           "Compression",           0, ""),
    ("total_proof_time_ms",                 "Total",                 0, ""),
]

CELL_DISTRIBUTION: list[BasicRow] = [
    ("powdr_ratio",               "Powdr",              fmt_pct),
    ("normal_instruction_ratio",  "Normal Instructions", fmt_pct),
    ("openvm_precompile_ratio",   "OpenVM Precompiles",  fmt_pct),
]


def print_section(
    title: str, rows: list[Row], m: Metrics, *, pct_of_key: str | None = None
) -> None:
    print(f"\n  {title}")
    print(f"  {'─' * 58}")
    width = max(len(r[1]) for r in rows)
    total = m.get(pct_of_key, 0) if pct_of_key else 0

    for row in rows:
        key, label = row[0], row[1]
        val: float | None = m.get(key)

        if key == "total_proof_time_ms":
            print(f"  {'─' * 58}")

        if val is None:
            print(f"  {label:<{width}}  N/A")
            continue

        # Determine formatter and flags
        if len(row) == 3:
            fmt: Formatter = row[2]  # type: ignore[assignment]
            flags = ""
        else:
            fmt = fmt_ms
            flags: str = row[3]  # type: ignore[no-redef]

        suffix = " (residual)" if "r" in flags else ""

        pct = f"  ({val / total * 100:5.1f}%)" if total > 0 else ""
        print(f"  {label:<{width}}  {fmt(val)}{pct}{suffix}")


# ============================================================
# Data loading (IO)
# ============================================================

def load_data(source: str) -> tuple[dict[str, Any], str]:
    """Load JSON from a file path or URL. Returns (data, source_label)."""
    if source.startswith("http://") or source.startswith("https://"):
        url = re.sub(r"github\.com/(.+)/blob/", r"raw.githubusercontent.com/\1/", source)
        with urllib.request.urlopen(url) as resp:
            return json.loads(resp.read()), url.split("/")[-1]
    else:
        with open(source) as f:
            return json.load(f), source.split("/")[-1]


def resolve_experiments(
    data: dict[str, Any], source_label: str, experiment: str | None
) -> dict[str, MetricsJson]:
    """Normalize raw/combined input and select experiment(s). Returns dict of {name: json}."""
    if "counter" in data and "gauge" in data:
        name = experiment or source_label.replace(".json", "")
        return {name: data}

    if experiment:
        if experiment not in data:
            sys.exit(f"Error: '{experiment}' not found. Available: {', '.join(sorted(data))}")
        return {experiment: data[experiment]}

    if len(data) == 1:
        return data

    print(f"Combined file with {len(data)} experiments:")
    for name in sorted(data):
        print(f"  - {name}")
    print(f"\nUsage: {sys.argv[0]} {sys.argv[1]} <experiment_name>")
    sys.exit(0)


# ============================================================
# Main
# ============================================================

def main() -> None:
    if len(sys.argv) < 2:
        print(__doc__.strip())
        sys.exit(1)

    data, source_label = load_data(sys.argv[1])
    experiment = sys.argv[2] if len(sys.argv) > 2 else None
    runs = resolve_experiments(data, source_label, experiment)

    for run_name, metrics_json in runs.items():
        version = detect_version(metrics_json)
        m = extract_metrics(run_name, metrics_json)

        print(f"\nExperiment: {run_name}  (OpenVM {version})")

        basic = BASIC_STATS_V2 if version == 2 else BASIC_STATS_V1
        proof = PROOF_TIME_V2 if version == 2 else PROOF_TIME_V1

        print_section("App Proof Basic Stats", basic, m)
        print_section("Proof Time", proof, m, pct_of_key="total_proof_time_ms")
        print_section("Trace Cell Distribution", CELL_DISTRIBUTION, m)


if __name__ == "__main__":
    main()


================================================
FILE: openvm/src/air_builder.rs
================================================
use std::sync::Arc;

use openvm_stark_backend::air_builders::symbolic::get_symbolic_builder;
use openvm_stark_backend::air_builders::symbolic::SymbolicRapBuilder;
use openvm_stark_backend::config::Com;
use openvm_stark_backend::config::StarkGenericConfig;
use openvm_stark_backend::config::Val;
use openvm_stark_backend::interaction::RapPhaseSeqKind;
use openvm_stark_backend::keygen::types::ProverOnlySinglePreprocessedData;
use openvm_stark_backend::keygen::types::TraceWidth;
use openvm_stark_backend::keygen::types::VerifierSinglePreprocessedData;
use openvm_stark_backend::p3_commit::Pcs;
use openvm_stark_backend::p3_matrix::Matrix;
use openvm_stark_backend::rap::AnyRap;

pub struct PrepKeygenData<SC: StarkGenericConfig> {
    pub _verifier_data: Option<VerifierSinglePreprocessedData<Com<SC>>>,
    pub prover_data: Option<ProverOnlySinglePreprocessedData<SC>>,
}

pub struct AirKeygenBuilder<SC: StarkGenericConfig> {
    air: Arc<dyn AnyRap<SC>>,
    prep_keygen_data: PrepKeygenData<SC>,
}

fn compute_prep_data_for_air<SC: StarkGenericConfig>(
    pcs: &SC::Pcs,
    air: &dyn AnyRap<SC>,
) -> PrepKeygenData<SC> {
    let preprocessed_trace = air.preprocessed_trace();
    let vpdata_opt = preprocessed_trace.map(|trace| {
        let domain = pcs.natural_domain_for_degree(trace.height());
        let (commit, data) = pcs.commit(vec![(domain, trace.clone())]);
        let vdata = VerifierSinglePreprocessedData { commit };
        let pdata = ProverOnlySinglePreprocessedData {
            trace: Arc::new(trace),
            data: Arc::new(data),
        };
        (vdata, pdata)
    });
    if let Some((vdata, pdata)) = vpdata_opt {
        PrepKeygenData {
            prover_data: Some(pdata),
            _verifier_data: Some(vdata),
        }
    } else {
        PrepKeygenData {
            prover_data: None,
            _verifier_data: None,
        }
    }
}

impl<SC: StarkGenericConfig> AirKeygenBuilder<SC> {
    pub fn new(pcs: &SC::Pcs, air: Arc<dyn AnyRap<SC>>) -> Self {
        let prep_keygen_data = compute_prep_data_for_air(pcs, air.as_ref());
        AirKeygenBuilder {
            air,
            prep_keygen_data,
        }
    }

    pub fn get_symbolic_builder(
        &self,
        max_constraint_degree: Option<usize>,
    ) -> SymbolicRapBuilder<Val<SC>> {
        let width = TraceWidth {
            preprocessed: self.prep_keygen_data.width(),
            cached_mains: self.air.cached_main_widths(),
            common_main: self.air.common_main_width(),
            after_challenge: vec![],
        };
        get_symbolic_builder(
            self.air.as_ref(),
            &width,
            &[],
            &[],
            RapPhaseSeqKind::None,
            max_constraint_degree.unwrap_or(0),
        )
    }
}

impl<SC: StarkGenericConfig> PrepKeygenData<SC> {
    pub fn width(&self) -> Option<usize> {
        self.prover_data.as_ref().map(|d| d.trace.width())
    }
}


================================================
FILE: openvm/src/cuda_abi.rs
================================================
#![cfg(feature = "cuda")]

use openvm_cuda_backend::base::DeviceMatrix;
use openvm_cuda_common::{d_buffer::DeviceBuffer, error::CudaError};
use openvm_stark_backend::prover::hal::MatrixDimensions;
use openvm_stark_sdk::p3_baby_bear::BabyBear;

extern "C" {
    /// Launches the GPU kernel that maps original AIR traces into the APC trace buffer.
    ///
    /// Safety: All pointers must be valid device pointers for the specified lengths.
    pub fn _apc_tracegen(
        d_output: *mut BabyBear,             // column-major
        output_height: usize,                // H_out
        d_original_airs: *const OriginalAir, // device array of AIR metadata
        d_subs: *const Subst,                // device array of all substitutions
        n_subs: usize,                       // number of substitutions
        num_apc_calls: i32,                  // number of APC calls
    ) -> i32;

    /// Applies derived expression columns on the GPU.
    /// Each thread processes rows; for rows >= num_apc_calls, writes zeros.
    /// Safety: All device pointers must be valid for the specified lengths.
    pub fn _apc_apply_derived_expr(
        d_output: *mut BabyBear,         // APC trace matrix (column-major)
        output_height: usize,            // rows (height)
        num_apc_calls: i32,              // number of valid rows
        d_specs: *const DerivedExprSpec, // device array of derived expression specs
        n_cols: usize,                   // number of derived columns
        d_bytecode: *const u32,          // device bytecode buffer
    ) -> i32;

    /// Launches the GPU kernel that applies bus interactions to periphery histograms.
    ///
    /// Safety: All pointers must be valid device pointers for the specified lengths.
    pub fn _apc_apply_bus(
        // APC related
        d_output: *const BabyBear, // APC trace buffer (column-major), device pointer
        num_apc_calls: i32,        // number of APC calls (rows to process)

        // Interaction related
        d_bytecode: *const u32, // device bytecode buffer for stack-machine expressions
        bytecode_len: usize,    // length of bytecode buffer (u32 words)
        d_interactions: *const DevInteraction, // device array of interactions
        n_interactions: usize,  // number of interactions
        d_arg_spans: *const ExprSpan, // device array of arg spans into `d_bytecode`
        n_arg_spans: usize,     // number of arg spans

        // Variable range checker related
        var_range_bus_id: u32, // bus id for the variable range checker
        d_var_hist: *mut u32,  // device histogram for variable range checker
        var_num_bins: usize,   // number of bins in variable range histogram

        // Tuple range checker related
        tuple2_bus_id: u32,      // bus id for the 2-tuple range checker
        d_tuple2_hist: *mut u32, // device histogram for tuple2 checker
        tuple2_sz0: u32,         // tuple2 dimension 0 size
        tuple2_sz1: u32,         // tuple2 dimension 1 size

        // Bitwise related
        bitwise_bus_id: u32,      // bus id for the bitwise lookup
        d_bitwise_hist: *mut u32, // device histogram for bitwise lookup
    ) -> i32;
}

#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct OriginalAir {
    pub width: i32,              // number of columns
    pub height: i32,             // number of rows (Ha)
    pub buffer: *const BabyBear, // column-major base: col*height + row (device ptr)
    pub row_block_size: i32,     // stride between used rows
}

#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct Subst {
    /// Index of the source AIR in `d_original_airs`
    pub air_index: i32,
    /// Source column within this AIR
    pub col: i32,
    /// Base row offset within the row-block
    pub row: i32,
    /// Destination APC column
    pub apc_col: i32,
}

#[repr(C)]
#[derive(Clone, Copy)]
pub struct DerivedExprSpec {
    /// Precomputed destination APC column base = (apc_col_index * H)
    pub col_base: u64,
    /// Expression span inside the shared bytecode buffer
    pub span: ExprSpan,
}

pub fn apc_tracegen(
    output: &mut DeviceMatrix<BabyBear>,      // column-major
    original_airs: DeviceBuffer<OriginalAir>, // device array of AIR metadata
    substitutions: DeviceBuffer<Subst>,       // device array of all substitutions
    num_apc_calls: usize,
) -> Result<(), CudaError> {
    let output_height = output.height();

    unsafe {
        CudaError::from_result(_apc_tracegen(
            output.buffer().as_mut_ptr(),
            output_height,
            original_airs.as_ptr(),
            substitutions.as_ptr(),
            substitutions.len(),
            num_apc_calls as i32,
        ))
    }
}

/// High-level wrapper for `_apc_apply_derived_expr`.
/// Applies derived arbitrary expression columns using the GPU stack machine.
pub fn apc_apply_derived_expr(
    output: &mut DeviceMatrix<BabyBear>,
    specs: DeviceBuffer<DerivedExprSpec>,
    bytecode: DeviceBuffer<u32>,
    num_apc_calls: usize,
) -> Result<(), CudaError> {
    unsafe {
        CudaError::from_result(_apc_apply_derived_expr(
            output.buffer().as_mut_ptr(),
            output.height(),
            num_apc_calls as i32,
            specs.as_ptr(),
            specs.len(),
            bytecode.as_ptr(),
        ))
    }
}

/// OpCode enum for the GPU stack machine bus evaluator.
#[repr(u32)]
pub enum OpCode {
    PushApc = 0, // Push the APC value onto the stack. Must be followed by the index of the value in the APC device buffer.
    PushConst = 1, // Push a constant value onto the stack. Must be followed by the constant value.
    Add = 2,     // Add the top two values on the stack.
    Sub = 3,     // Subtract the top two values on the stack.
    Mul = 4,     // Multiply the top two values on the stack.
    Neg = 5,     // Negate the top value on the stack.
    InvOrZero = 6, // Invert the top value on the stack if it is not zero, otherwise pop and push zero.
}

/// GPU device representation of a bus interaction.
#[repr(C)]
#[derive(Clone, Copy)]
pub struct DevInteraction {
    /// Bus id this interaction targets (matches periphery chip bus id)
    pub bus_id: u32,
    /// Number of argument expressions for this interaction
    pub num_args: u32,
    /// Starting index into the `ExprSpan` array for this interaction's args
    /// Layout: [ multiplicity span, arg0, arg1, ... ]
    pub args_index_off: u32,
}

#[repr(C)]
#[derive(Clone, Copy)]
pub struct ExprSpan {
    /// Offset (in u32 words) into `bytecode` where this arg expression starts
    pub off: u32,
    /// Length (instruction count) of this arg expression
    pub len: u32,
}

/// High-level safe wrapper for `_apc_apply_bus`. Applies bus interactions on the GPU,
/// updating periphery histograms in-place.
#[allow(clippy::too_many_arguments)]
pub fn apc_apply_bus(
    // APC related
    output: &DeviceMatrix<BabyBear>, // APC trace matrix (column-major) on device
    num_apc_calls: usize,            // number of APC calls (rows to process)

    // Interaction related
    bytecode: DeviceBuffer<u32>,                // device bytecode buffer
    interactions: DeviceBuffer<DevInteraction>, // device array of interactions
    arg_spans: DeviceBuffer<ExprSpan>,          // device array of arg spans

    // Variable range checker related
    var_range_bus_id: u32, // bus id for variable range checker
    var_range_count: &DeviceBuffer<BabyBear>, // device histogram for variable range

    // Tuple range checker related
    tuple2_bus_id: u32, // bus id for tuple range checker (2-ary)
    tuple2_count: &DeviceBuffer<BabyBear>, // device histogram for tuple2
    tuple2_sizes: [u32; 2], // tuple2 sizes (dim0, dim1)

    // Bitwise related
    bitwise_bus_id: u32,                    // bus id for bitwise lookup
    bitwise_count: &DeviceBuffer<BabyBear>, // device histogram for bitwise lookup
) -> Result<(), CudaError> {
    unsafe {
        CudaError::from_result(_apc_apply_bus(
            // APC related
            output.buffer().as_ptr(),
            num_apc_calls as i32,
            // Interaction related
            bytecode.as_ptr(),
            bytecode.len(),
            interactions.as_ptr(),
            interactions.len(),
            arg_spans.as_ptr(),
            arg_spans.len(),
            // Variable range checker related
            var_range_bus_id,
            var_range_count.as_mut_ptr() as *mut u32,
            var_range_count.len(),
            // Tuple range checker related
            tuple2_bus_id,
            tuple2_count.as_mut_ptr() as *mut u32,
            tuple2_sizes[0],
            tuple2_sizes[1],
            // Bitwise related
            bitwise_bus_id,
            bitwise_count.as_mut_ptr() as *mut u32,
        ))
    }
}


================================================
FILE: openvm/src/customize_exe.rs
================================================
use std::fmt::Display;
use std::hash::Hash;
use std::iter::once;
use std::marker::PhantomData;
use std::sync::Arc;

use crate::extraction_utils::{get_air_metrics, AirWidthsDiff, OriginalAirs};
use crate::isa::OpenVmISA;
use crate::powdr_extension::chip::PowdrAir;
use crate::program::Prog;
use crate::OriginalCompiledProgram;
use crate::{CompiledProgram, SpecializedConfig};
use itertools::Itertools;
use openvm_circuit::arch::VmState;
use openvm_circuit::system::memory::online::GuestMemory;
use openvm_instructions::instruction::Instruction as OpenVmInstruction;
use openvm_instructions::program::DEFAULT_PC_STEP;
use openvm_instructions::VmOpcode;
use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::adapter::{
    Adapter, AdapterApc, AdapterApcWithStats, ApcWithStats, PgoAdapter,
};
use powdr_autoprecompiles::blocks::{Instruction, PcStep};
use powdr_autoprecompiles::empirical_constraints::EmpiricalConstraints;
use powdr_autoprecompiles::execution::ExecutionState;
use powdr_autoprecompiles::pgo::ApcCandidate;
use powdr_autoprecompiles::PowdrConfig;
use powdr_autoprecompiles::{InstructionHandler, VmConfig};
use powdr_number::{BabyBearField, FieldElement, LargeInt};
use powdr_openvm_bus_interaction_handler::bus_map::OpenVmBusType;
use serde::{Deserialize, Serialize};

use crate::powdr_extension::{PowdrOpcode, PowdrPrecompile};

pub use powdr_openvm_bus_interaction_handler::{
    memory_bus_interaction::OpenVmMemoryBusInteraction, OpenVmBusInteractionHandler,
};

pub const POWDR_OPCODE: usize = 0x10ff;

/// An adapter for the BabyBear OpenVM precompiles.
/// Note: This could be made generic over the field, but the implementation of `Candidate` is BabyBear-specific.
/// The lifetime parameter is used because we use a reference to the `OpenVmProgram` in the `Prog` type.
pub struct BabyBearOpenVmApcAdapter<'a, ISA> {
    _marker: std::marker::PhantomData<&'a ISA>,
}

/// The openvm execution state, used for execution constraint checking
pub struct OpenVmExecutionState<'a, F, ISA> {
    inner: &'a VmState<F, GuestMemory>,
    _marker: PhantomData<ISA>,
}

impl<'a, F: PrimeField32, ISA> From<&'a VmState<F, GuestMemory>>
    for OpenVmExecutionState<'a, F, ISA>
{
    fn from(inner: &'a VmState<F, GuestMemory>) -> Self {
        Self {
            inner,
            _marker: PhantomData,
        }
    }
}
// TODO: This is not tested yet as apc compilation does not currently output any optimistic constraints
impl<'a, F: PrimeField32, ISA: OpenVmISA> ExecutionState for OpenVmExecutionState<'a, F, ISA> {
    type RegisterAddress = ();
    type Value = u32;

    fn pc(&self) -> Self::Value {
        self.inner.pc()
    }

    fn reg(&self, _addr: &Self::RegisterAddress) -> Self::Value {
        unimplemented!("optimistic constraints are currently unused")
    }

    fn value_limb(_value: Self::Value, _limb_index: usize) -> Self::Value {
        unimplemented!("optimistic constraints are currently unused")
    }

    fn global_clk(&self) -> usize {
        unimplemented!("optimistic constraints are currently unused")
    }
}

impl<'a, ISA: OpenVmISA> Adapter for BabyBearOpenVmApcAdapter<'a, ISA> {
    type PowdrField = BabyBearField;
    type Field = BabyBear;
    type InstructionHandler = OriginalAirs<Self::Field, ISA>;
    type BusInteractionHandler = OpenVmBusInteractionHandler<Self::PowdrField>;
    type Program = Prog<'a, Self::Field>;
    type Instruction = Instr<Self::Field, ISA>;
    type MemoryBusInteraction<V: Ord + Clone + Eq + Display + Hash> =
        OpenVmMemoryBusInteraction<Self::PowdrField, V>;
    type CustomBusTypes = OpenVmBusType;
    type ApcStats = OvmApcStats;
    type AirId = String;
    type ExecutionState = OpenVmExecutionState<'a, BabyBear, ISA>;

    fn into_field(e: Self::PowdrField) -> Self::Field {
        openvm_stark_sdk::p3_baby_bear::BabyBear::from_canonical_u32(
            e.to_integer().try_into_u32().unwrap(),
        )
    }

    fn from_field(e: Self::Field) -> Self::PowdrField {
        BabyBearField::from(e.as_canonical_u32())
    }

    fn apc_stats(
        apc: Arc<AdapterApc<Self>>,
        instruction_handler: &Self::InstructionHandler,
    ) -> Self::ApcStats {
        // Get the metrics for the apc using the same degree bound as the one used for the instruction chips
        let apc_metrics = get_air_metrics(
            Arc::new(PowdrAir::new(apc.machine.clone())),
            instruction_handler.degree_bound().identities,
        );
        let width_after = apc_metrics.widths;

        // Sum up the metrics for each instruction
        let width_before = apc
            .instructions()
            .map(|instr| {
                instruction_handler
                    .get_instruction_metrics(instr.inner.opcode)
                    .unwrap()
                    .widths
            })
            .sum();

        OvmApcStats::new(AirWidthsDiff::new(width_before, width_after))
    }

    fn is_allowed(instruction: &Self::Instruction) -> bool {
        ISA::allowed_opcodes().contains(&instruction.inner.opcode)
    }

    fn is_branching(instruction: &Self::Instruction) -> bool {
        ISA::branching_opcodes().contains(&instruction.inner.opcode)
    }
}

#[derive(Serialize, Deserialize)]
pub struct Instr<F, ISA> {
    pub inner: OpenVmInstruction<F>,
    _marker: PhantomData<ISA>,
}

impl<F, ISA> From<OpenVmInstruction<F>> for Instr<F, ISA> {
    fn from(value: OpenVmInstruction<F>) -> Self {
        Self {
            inner: value,
            _marker: PhantomData,
        }
    }
}

// TODO: derive, probably the compiler being too conservative here
impl<F, ISA> Clone for Instr<F, ISA>
where
    OpenVmInstruction<F>: Clone,
{
    fn clone(&self) -> Self {
        Self {
            inner: self.inner.clone(),
            _marker: PhantomData,
        }
    }
}

impl<F: PrimeField32, ISA: OpenVmISA> Display for Instr<F, ISA> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", ISA::format(&self.inner))
    }
}

impl<F, ISA: OpenVmISA> PcStep for Instr<F, ISA> {
    fn pc_step() -> u32 {
        DEFAULT_PC_STEP
    }
}

impl<F: PrimeField32, ISA: OpenVmISA> Instruction<F> for Instr<F, ISA> {
    fn pc_lookup_row(&self, pc: u64) -> Vec<F> {
        let args = [
            self.inner.opcode.to_field(),
            self.inner.a,
            self.inner.b,
            self.inner.c,
            self.inner.d,
            self.inner.e,
            self.inner.f,
            self.inner.g,
        ];
        // The PC lookup row has the format:
        // [pc, opcode, a, b, c, d, e, f, g]
        let pc = F::from_canonical_u32(pc.try_into().unwrap());
        once(pc).chain(args).collect()
    }
}

pub fn customize<'a, ISA: OpenVmISA, P: PgoAdapter<Adapter = BabyBearOpenVmApcAdapter<'a, ISA>>>(
    original_program: OriginalCompiledProgram<ISA>,
    config: PowdrConfig,
    pgo: P,
    empirical_constraints: EmpiricalConstraints,
) -> CompiledProgram<ISA> {
    let original_config = original_program.vm_config.clone();
    let airs = original_config.airs(config.degree_bound).expect("Failed to convert the AIR of an OpenVM instruction, even after filtering by the blacklist!");
    let bus_map = original_config.bus_map();

    let vm_config = VmConfig {
        instruction_handler: &airs,
        bus_interaction_handler: OpenVmBusInteractionHandler::new(bus_map.clone()),
        bus_map: bus_map.clone(),
    };

    let symbols = ISA::get_symbol_table(&original_program.linked_program);
    let blocks = original_program.collect_basic_blocks();
    tracing::info!(
        "Got {} basic blocks from `collect_basic_blocks`",
        blocks.len()
    );
    if tracing::enabled!(tracing::Level::DEBUG) {
        tracing::debug!("Basic blocks sorted by execution count (top 10):");
        for (count, block) in blocks
            .iter()
            .filter_map(|block| Some((pgo.pc_execution_count(block.start_pc)?, block)))
            .sorted_by_key(|(count, _)| *count)
            .rev()
            .take(10)
        {
            let name = symbols
                .try_get_one_or_preceding(block.start_pc)
                .map(|(symbol, offset)| format!("{} + {offset}", symbol))
                .unwrap_or_default();
            tracing::debug!("Basic block (executed {count} times), {name}:\n{block}",);
        }
    }

    let symbols = symbols
        .into_table()
        .into_iter()
        .map(|(key, values)| (key.into(), values))
        .collect();

    let exe = original_program.exe;
    let start = std::time::Instant::now();
    let apcs = pgo.filter_blocks_and_create_apcs_with_pgo(
        blocks,
        &config,
        vm_config,
        symbols,
        empirical_constraints.apply_pc_threshold(),
    );
    metrics::gauge!("total_apc_gen_time_ms").set(start.elapsed().as_millis() as f64);

    let pc_base = exe.program.pc_base;
    let pc_step = DEFAULT_PC_STEP;
    // We need to clone the program because we need to modify it to add the apc instructions.
    let mut exe = (*exe).clone();
    let program = &mut exe.program;

    tracing::info!("Adjust the program with the autoprecompiles");

    let extensions = apcs
        .into_iter()
        .map(ApcWithStats::into_parts)
        .enumerate()
        .map(|(i, (apc, apc_stats, _))| {
            let opcode = POWDR_OPCODE + i;
            let start_pc = apc
                .block
                .try_as_basic_block()
                .expect("Superblocks not yet supported in OpenVM")
                .start_pc;
            let start_index = ((start_pc - pc_base as u64) / pc_step as u64)
                .try_into()
                .unwrap();

            // We encode in the program that the prover should execute the apc instruction instead of the original software version.
            // This is only for witgen: the program in the program chip is left unchanged.
            program.add_apc_instruction_at_pc_index(start_index, VmOpcode::from_usize(opcode));

            PowdrPrecompile::new(
                format!("PowdrAutoprecompile_{}", start_pc),
                PowdrOpcode {
                    class_offset: opcode,
                },
                apc,
                apc_stats,
            )
        })
        .collect();

    CompiledProgram {
        exe: Arc::new(exe),
        vm_config: SpecializedConfig::new(original_config, extensions, config.degree_bound),
    }
}

#[derive(Clone, Serialize, Deserialize)]
pub struct OvmApcStats {
    pub widths: AirWidthsDiff,
}

impl OvmApcStats {
    pub fn new(widths: AirWidthsDiff) -> Self {
        Self { widths }
    }
}

#[derive(Serialize, Deserialize)]
pub struct OpenVmApcCandidate<ISA: OpenVmISA>(
    ApcWithStats<BabyBear, Instr<BabyBear, ISA>, (), u32, OvmApcStats>,
);

impl<'a, ISA: OpenVmISA> ApcCandidate<BabyBearOpenVmApcAdapter<'a, ISA>>
    for OpenVmApcCandidate<ISA>
{
    fn create(apc_with_stats: AdapterApcWithStats<BabyBearOpenVmApcAdapter<'a, ISA>>) -> Self {
        Self(apc_with_stats)
    }

    fn inner(&self) -> &AdapterApcWithStats<BabyBearOpenVmApcAdapter<'a, ISA>> {
        &self.0
    }

    fn into_inner(self) -> AdapterApcWithStats<BabyBearOpenVmApcAdapter<'a, ISA>> {
        self.0
    }

    fn cost_before_opt(&self) -> usize {
        self.0.stats().widths.before.total()
    }

    fn cost_after_opt(&self) -> usize {
        self.0.stats().widths.after.total()
    }

    fn value_per_use(&self) -> usize {
        self.cost_before_opt() - self.cost_after_opt()
    }
}


================================================
FILE: openvm/src/empirical_constraints.rs
================================================
use crate::isa::OpenVmISA;
use crate::program::CompiledProgram;
use crate::trace_generation::do_with_cpu_trace;
use indicatif::ProgressBar;
use indicatif::ProgressStyle;
use itertools::Itertools;
use openvm_circuit::arch::VmCircuitConfig;
use openvm_sdk::StdIn;
use openvm_stark_backend::p3_field::FieldAlgebra;
use openvm_stark_backend::p3_maybe_rayon::prelude::IntoParallelIterator;
use openvm_stark_backend::p3_maybe_rayon::prelude::ParallelIterator;
use openvm_stark_sdk::openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::bus_map::BusType;
use powdr_autoprecompiles::empirical_constraints::BlockCell;
use powdr_autoprecompiles::empirical_constraints::Partition;
use powdr_autoprecompiles::empirical_constraints::{DebugInfo, EmpiricalConstraints};
use powdr_autoprecompiles::expression::AlgebraicEvaluator;
use powdr_autoprecompiles::expression::RowEvaluator;
use powdr_autoprecompiles::optimistic::config::optimistic_precompile_config;
use powdr_autoprecompiles::DegreeBound;
use powdr_openvm_bus_interaction_handler::bus_map::default_openvm_bus_map;
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::iter::once;

use crate::OriginalCompiledProgram;

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
struct Timestamp {
    // Note that the order of the fields matters for correct ordering.
    segment_idx: usize,
    value: u32,
}

/// A single row in the execution trace
#[derive(Debug)]
struct Row {
    /// The program counter value for this row
    pc: u32,
    /// The timestamp for this row (segment index, row index within segment)
    timestamp: Timestamp,
    /// The values of the cells in this row
    cells: Vec<u32>,
}

/// Materialized execution trace
#[derive(Default)]
struct Trace {
    /// The raw rows, in any order
    rows: Vec<Row>,
}

impl Trace {
    /// Groups rows by their program counter value. The order of rows within each PC group is arbitrary.
    fn rows_by_pc(&self) -> BTreeMap<u32, Vec<&Row>> {
        self.rows.iter().fold(BTreeMap::new(), |mut acc, row| {
            acc.entry(row.pc).or_insert(Vec::new()).push(row);
            acc
        })
    }

    /// Returns all rows sorted by their timestamp
    fn rows_sorted_by_time(&self) -> impl Iterator<Item = &Row> {
        self.rows.iter().sorted_by_key(|row| &row.timestamp)
    }

    fn take(&mut self) -> Self {
        Self {
            rows: std::mem::take(&mut self.rows),
        }
    }
}

pub fn detect_empirical_constraints<ISA: OpenVmISA>(
    program: &OriginalCompiledProgram<ISA>,
    degree_bound: DegreeBound,
    inputs: Vec<StdIn>,
) -> EmpiricalConstraints {
    tracing::info!("Collecting empirical constraints...");
    let blocks = program.collect_basic_blocks();
    let instruction_counts = blocks
        .iter()
        .map(|block| (block.start_pc, block.instructions.len()))
        .collect();

    // Collect trace, without any autoprecompiles.
    let program = program.compiled_program(degree_bound);

    let mut constraint_detector = ConstraintDetector::new(instruction_counts);

    let num_inputs = inputs.len();
    for (i, input) in inputs.into_iter().enumerate() {
        tracing::info!("  Processing input {} / {}", i + 1, num_inputs);
        detect_empirical_constraints_from_input(
            &program,
            i,
            input,
            degree_bound,
            &mut constraint_detector,
        );
    }
    tracing::info!("Done collecting empirical constraints.");

    constraint_detector.finalize()
}

fn detect_empirical_constraints_from_input<ISA: OpenVmISA>(
    program: &CompiledProgram<ISA>,
    input_index: usize,
    inputs: StdIn,
    degree_bound: DegreeBound,
    constraint_detector: &mut ConstraintDetector,
) {
    let mut trace = Trace::default();
    let mut debug_info = DebugInfo::default();

    let max_segments = optimistic_precompile_config().max_segments;

    do_with_cpu_trace(program, inputs, |seg_idx, vm, _pk, ctx| {
        let airs = program.vm_config.original.airs(degree_bound).unwrap();
        let global_airs = vm
            .config()
            .create_airs()
            .unwrap()
            .into_airs()
            .enumerate()
            .collect::<HashMap<_, _>>();

        for (air_id, proving_context) in &ctx.per_air {
            let main = proving_context.common_main.as_ref().unwrap();
            let air_name = global_airs[air_id].name();
            let Some(machine) = &airs.get_air_machine(&air_name) else {
                // air_name_to_machine only contains instruction AIRs, and we are only
                // interested in those here.
                continue;
            };
            assert!(
                proving_context.cached_mains.is_empty(),
                "Unexpected cached main in {air_name}."
            );

            // Find the execution bus interaction
            // This assumes there is exactly one, which is the case for instruction chips
            let execution_bus_interaction = machine
                .bus_interactions
                .iter()
                .find(|interaction| {
                    interaction.id
                        == default_openvm_bus_map()
                            .get_bus_id(&BusType::ExecutionBridge)
                            .unwrap()
                })
                .unwrap();

            if !debug_info.column_names_by_air_id.contains_key(air_id) {
                debug_info.column_names_by_air_id.insert(
                    *air_id,
                    machine.main_columns().map(|r| (*r.name).clone()).collect(),
                );
            }

            for row in main.row_slices() {
                // Create an evaluator over this row
                let evaluator = RowEvaluator::new(row);

                // Evaluate the execution bus interaction
                let execution = evaluator.eval_bus_interaction(execution_bus_interaction);

                // `is_valid` is the multiplicity
                let is_valid = execution.mult;
                if is_valid == BabyBear::ZERO {
                    // If `is_valid` is zero, this is a padding row
                    continue;
                }

                // Recover the values of the pc and timestamp
                let [pc, timestamp] = execution
                    .args
                    .map(|v| v.as_canonical_u32())
                    .collect_vec()
                    .try_into()
                    .unwrap();

                // Convert the row to u32s
                let row = row.iter().map(|v| v.as_canonical_u32()).collect();

                let row = Row {
                    cells: row,
                    pc,
                    timestamp: Timestamp {
                        segment_idx: seg_idx,
                        value: timestamp,
                    },
                };
                trace.rows.push(row);

                match debug_info.air_id_by_pc.entry(pc) {
                    Entry::Vacant(entry) => {
                        entry.insert(*air_id);
                    }
                    Entry::Occupied(existing) => {
                        assert_eq!(*existing.get(), *air_id);
                    }
                }
            }
        }

        if (seg_idx + 1) % max_segments == 0 {
            tracing::info!(
                "    Reached segment {} of input {}, processing trace so far...",
                seg_idx + 1,
                input_index + 1
            );
            let (trace_to_process, remaining_trace) =
                take_complete_blocks(constraint_detector, trace.take());
            trace = remaining_trace;
            constraint_detector.process_trace(trace_to_process, debug_info.take());
        }
    })
    .unwrap();
    tracing::info!(
        "    Finished execution of input {}, processing (remaining) trace...",
        input_index + 1
    );
    constraint_detector.process_trace(trace, debug_info);
}

/// Takes as many complete basic blocks from the trace as possible,
/// returning the taken trace and the remaining trace.
/// This is needed because ConstraintDetector::process_trace requires complete basic blocks,
/// but segmentation might happen within a basic block.
fn take_complete_blocks(constraint_detector: &ConstraintDetector, trace: Trace) -> (Trace, Trace) {
    // Find the latest timestamp that begins a basic block
    let latest_basic_block_beginning = trace
        .rows
        .iter()
        .filter(|row| constraint_detector.is_basic_block_start(row.pc as u64))
        .map(|row| &row.timestamp)
        .max()
        .unwrap()
        .clone();
    // Process all rows before that timestamp
    let (rows_to_process, remaining_rows): (Vec<Row>, Vec<Row>) = trace
        .rows
        .into_iter()
        .partition(|row| row.timestamp < latest_basic_block_beginning);

    let trace_to_process = Trace {
        rows: rows_to_process,
    };
    let remaining_trace = Trace {
        rows: remaining_rows,
    };

    (trace_to_process, remaining_trace)
}

struct ConstraintDetector {
    /// Mapping from a basic block ID (= PC of the first instruction) to number
    /// of instructions in that block
    block_instruction_counts: HashMap<u64, usize>,
    empirical_constraints: EmpiricalConstraints,
}

/// An instance of a basic block in the trace
struct ConcreteBlock<'a> {
    rows: Vec<&'a Row>,
}

impl<'a> ConcreteBlock<'a> {
    fn equivalence_classes(&self) -> Partition<BlockCell> {
        self.rows
            .iter()
            .enumerate()
            // Map each cell to a (value, (instruction_index, col_index)) pair
            .flat_map(|(instruction_index, row)| {
                row.cells
                    .iter()
                    .enumerate()
                    .map(|(col_index, v)| (*v, BlockCell::new(instruction_index, col_index)))
                    .collect::<Vec<_>>()
            })
            // Group by value
            .into_group_map()
            .into_values()
            .collect()
    }
}

impl ConstraintDetector {
    pub fn new(block_instruction_counts: HashMap<u64, usize>) -> Self {
        Self {
            block_instruction_counts,
            empirical_constraints: EmpiricalConstraints::default(),
        }
    }

    pub fn is_basic_block_start(&self, pc: u64) -> bool {
        self.block_instruction_counts.contains_key(&pc)
    }

    pub fn finalize(self) -> EmpiricalConstraints {
        self.empirical_constraints
    }

    pub fn process_trace(&mut self, trace: Trace, debug_info: DebugInfo) {
        let pc_counts = trace
            .rows_by_pc()
            .into_iter()
            .map(|(pc, rows)| (pc, rows.len() as u64))
            .collect();
        // Compute empirical constraints from the current trace
        tracing::info!("      Detecting equivalence classes by block...");
        let equivalence_classes_by_block = self.generate_equivalence_classes_by_block(&trace);
        tracing::info!("      Detecting column ranges by PC...");
        let column_ranges_by_pc = self.detect_column_ranges_by_pc(trace);
        let new_empirical_constraints = EmpiricalConstraints {
            column_ranges_by_pc,
            equivalence_classes_by_block,
            debug_info,
            pc_counts,
        };

        // Combine the new empirical constraints and debug info with the existing ones
        self.empirical_constraints
            .combine_with(new_empirical_constraints);
    }

    fn detect_column_ranges_by_pc(&self, trace: Trace) -> BTreeMap<u32, Vec<(u32, u32)>> {
        // Map all column values to their range (1st and 99th percentile) for each pc
        trace
            .rows_by_pc()
            .into_par_iter()
            .map(|(pc, rows)| (pc, self.detect_column_ranges(&rows)))
            .collect()
    }

    fn detect_column_ranges(&self, rows: &[&Row]) -> Vec<(u32, u32)> {
        for row in rows {
            // All rows for a given PC should be in the same chip
            assert_eq!(row.cells.len(), rows[0].cells.len());
        }

        (0..rows[0].cells.len())
            .map(|col_index| {
                let mut values = rows
                    .iter()
                    .map(|row| row.cells[col_index])
                    .collect::<Vec<_>>();
                values.sort_unstable();
                let len = values.len();
                let p1_index = len / 100; // 1st percentile
                let p99_index = len * 99 / 100; // 99th percentile
                (values[p1_index], values[p99_index])
            })
            .collect()
    }

    fn generate_equivalence_classes_by_block(
        &self,
        trace: &Trace,
    ) -> BTreeMap<u64, Partition<BlockCell>> {
        tracing::info!("        Segmenting trace into blocks...");
        let blocks = self.get_blocks(trace);
        tracing::info!("        Finding equivalence classes...");
        let num_blocks = blocks.len();
        let pb = ProgressBar::new(num_blocks as u64).with_style(
            ProgressStyle::with_template("[{elapsed_precise}] [{bar:50}] {wide_msg}").unwrap(),
        );
        let partition = blocks
            .into_iter()
            .enumerate()
            .map(|(i, (block_id, block_instances))| {
                pb.set_message(format!(
                    "Block {} / {} ({} instances)",
                    i + 1,
                    num_blocks,
                    block_instances.len()
                ));

                // Build partitions for each block instance in parallel
                let partition_by_block_instance = block_instances
                    .into_par_iter()
                    .map(|block| block.equivalence_classes());

                // Intersect the equivalence classes across all instances in parallel
                let intersected = Partition::parallel_intersect(partition_by_block_instance);
                pb.inc(1);

                (block_id, intersected)
            })
            .collect();
        pb.finish_with_message("Done");
        partition
    }

    /// Segments a trace into basic blocks.
    /// Returns a mapping from block ID to all instances of that block in the trace.
    fn get_blocks<'a>(&self, trace: &'a Trace) -> BTreeMap<u64, Vec<ConcreteBlock<'a>>> {
        trace
            .rows_sorted_by_time()
            // take entire blocks from the rows
            .batching(|it| {
                let first = it.next()?;
                let block_id = first.pc as u64;

                if let Some(&count) = self.block_instruction_counts.get(&block_id) {
                    let rows = once(first).chain(it.take(count - 1)).collect_vec();

                    for (r1, r2) in rows.iter().tuple_windows() {
                        assert_eq!(r2.pc, r1.pc + 4);
                    }

                    Some(Some((block_id, ConcreteBlock { rows })))
                } else {
                    // Single instruction block, yield `None` to be filtered.
                    Some(None)
                }
            })
            // filter out single instruction blocks
            .flatten()
            // collect by start_pc
            .fold(Default::default(), |mut block_rows, (block_id, chunk)| {
                block_rows.entry(block_id).or_insert(Vec::new()).push(chunk);
                block_rows
            })
    }
}

#[cfg(test)]
mod tests {
    use powdr_autoprecompiles::equivalence_classes::EquivalenceClass;

    use super::*;

    fn make_trace(rows_by_time_with_pc: Vec<(u32, Vec<u32>)>) -> Trace {
        Trace {
            rows: rows_by_time_with_pc
                .into_iter()
                .enumerate()
                .map(|(clk, (pc, cells))| Row {
                    cells,
                    pc,
                    timestamp: Timestamp {
                        segment_idx: 0,
                        value: clk as u32,
                    },
                })
                .collect(),
        }
    }

    #[test]
    fn test_constraint_detector() {
        // Assume the following test program:
        // ADDI x1, x1, 1    // note how the second operand is always 1
        // BLT x1, x2, -4    // Note how the first operand is always equal to the result of the previous ADDI

        let instruction_counts = vec![(0, 2)].into_iter().collect();
        let mut detector = ConstraintDetector::new(instruction_counts);

        let trace1 = make_trace(vec![
            (0, vec![1, 0, 1]),  // ADDI: 0 + 1 = 1
            (4, vec![0, 1, 2]),  // BLT: 1 < 2 => PC = 0
            (0, vec![2, 1, 1]),  // ADDI: 1 + 1 = 2
            (4, vec![12, 2, 2]), // BLT: 2 >= 2 => PC = 8
        ]);
        detector.process_trace(trace1, DebugInfo::default());

        let empirical_constraints = detector.finalize();

        assert_eq!(
            empirical_constraints.column_ranges_by_pc.get(&0),
            // For the ADDI instruction, the second operand (col 2) is always 1; the other columns vary
            Some(&vec![(1, 2), (0, 1), (1, 1)])
        );
        assert_eq!(
            empirical_constraints.column_ranges_by_pc.get(&4),
            // For the BLT instruction, second operand (col 2) is always 2; the other columns vary
            Some(&vec![(0, 12), (1, 2), (2, 2)])
        );

        let equivalence_classes = empirical_constraints
            .equivalence_classes_by_block
            .get(&0)
            .unwrap();
        println!("Equivalence classes: {:?}", equivalence_classes);
        let expected: Partition<_> = once(
            // The result of the first instruction (col 0) is always equal to the
            // first operand of the second instruction (col 1)
            [BlockCell::new(0, 0), BlockCell::new(1, 1)]
                .into_iter()
                .collect::<EquivalenceClass<_>>(),
        )
        .collect();
        assert_eq!(*equivalence_classes, expected,);
    }
}


================================================
FILE: openvm/src/extraction_utils.rs
================================================
use std::collections::{BTreeMap, HashMap};
use std::marker::PhantomData;
use std::sync::{Arc, Mutex};

use itertools::Itertools;
use openvm_circuit::arch::{
    AirInventory, AirInventoryError, ExecutorInventory, ExecutorInventoryError, SystemConfig,
    VmCircuitConfig, VmExecutionConfig,
};
use openvm_circuit::system::memory::interface::MemoryInterfaceAirs;
use openvm_circuit_primitives::bitwise_op_lookup::SharedBitwiseOperationLookupChip;
use openvm_circuit_primitives::range_tuple::SharedRangeTupleCheckerChip;
use openvm_instructions::VmOpcode;
use openvm_stark_backend::air_builders::symbolic::SymbolicRapBuilder;
use openvm_stark_backend::interaction::fri_log_up::find_interaction_chunks;
use openvm_stark_backend::{
    air_builders::symbolic::SymbolicConstraints, config::StarkGenericConfig, rap::AnyRap,
};
use openvm_stark_sdk::config::{
    baby_bear_poseidon2::{config_from_perm, default_perm},
    fri_params::SecurityParameters,
};
use openvm_stark_sdk::p3_baby_bear::{self, BabyBear};
use powdr_autoprecompiles::bus_map::BusType;
use powdr_autoprecompiles::evaluation::AirStats;
use powdr_autoprecompiles::expression::try_convert;
use powdr_autoprecompiles::symbolic_machine::SymbolicMachine;
use powdr_autoprecompiles::{Apc, DegreeBound, InstructionHandler};
use powdr_openvm_bus_interaction_handler::bus_map::{BusMap, OpenVmBusType};
use serde::{Deserialize, Serialize};
use std::iter::Sum;
use std::ops::Deref;
use std::ops::{Add, Sub};
use std::sync::MutexGuard;

use crate::customize_exe::Instr;
use crate::isa::{OpenVmISA, OriginalCpuChipComplex};
use crate::powdr_extension::executor::RecordArenaDimension;
use crate::utils::openvm_bus_interaction_to_powdr;
use crate::utils::symbolic_to_algebraic;
use crate::utils::UnsupportedOpenVmReferenceError;
use crate::AirMetrics;
use crate::{air_builder::AirKeygenBuilder, BabyBearSC};

// TODO: Use `<PackedChallenge<BabyBearSC> as FieldExtensionAlgebra<Val<BabyBearSC>>>::D` instead after fixing p3 dependency
const EXT_DEGREE: usize = 4;

#[derive(Clone, Serialize, Deserialize)]
pub struct OriginalAirs<F, ISA> {
    /// The degree bound used when building the airs
    pub(crate) degree_bound: DegreeBound,
    /// Maps a VM opcode to the name of the (unique) AIR that implements it.
    pub(crate) opcode_to_air: HashMap<VmOpcode, String>,
    /// Maps an AIR name to its symbolic machine and metrics.
    /// Note that this map only contains AIRs that implement instructions.
    pub(crate) air_name_to_machine: BTreeMap<String, (SymbolicMachine<F>, AirMetrics)>,
    _marker: PhantomData<ISA>,
}

impl<F, ISA> InstructionHandler for OriginalAirs<F, ISA> {
    type Field = F;
    type Instruction = Instr<F, ISA>;
    type AirId = String;

    fn get_instruction_air_and_id(
        &self,
        instruction: &Self::Instruction,
    ) -> (Self::AirId, &SymbolicMachine<Self::Field>) {
        let id = self
            .opcode_to_air
            .get(&instruction.inner.opcode)
            .unwrap()
            .clone();
        let air = &self.air_name_to_machine.get(&id).unwrap().0;
        (id, air)
    }

    fn get_instruction_air_stats(&self, instruction: &Self::Instruction) -> AirStats {
        self.get_instruction_metrics(instruction.inner.opcode)
            .map(|metrics| metrics.clone().into())
            .unwrap()
    }

    fn degree_bound(&self) -> DegreeBound {
        self.degree_bound
    }
}

impl<F, ISA> OriginalAirs<F, ISA> {
    pub fn insert_opcode(
        &mut self,
        opcode: VmOpcode,
        air_name: String,
        machine: impl Fn(
            DegreeBound,
        )
            -> Result<(SymbolicMachine<F>, AirMetrics), UnsupportedOpenVmReferenceError>,
    ) -> Result<(), UnsupportedOpenVmReferenceError> {
        if self.opcode_to_air.contains_key(&opcode) {
            panic!("Opcode {opcode} already exists");
        }
        if !self.air_name_to_machine.contains_key(&air_name) {
            let machine_instance = machine(self.degree_bound)?;
            self.air_name_to_machine
                .insert(air_name.clone(), machine_instance);
        }

        self.opcode_to_air.insert(opcode, air_name);
        Ok(())
    }

    pub fn get_instruction_metrics(&self, opcode: VmOpcode) -> Option<&AirMetrics> {
        self.opcode_to_air.get(&opcode).and_then(|air_name| {
            self.air_name_to_machine
                .get(air_name)
                .map(|(_, metrics)| metrics)
        })
    }

    pub fn allow_list(&self) -> Vec<VmOpcode> {
        self.opcode_to_air.keys().cloned().collect()
    }

    pub fn airs_by_name(&self) -> impl Iterator<Item = (&String, &SymbolicMachine<F>)> {
        self.air_name_to_machine
            .iter()
            .map(|(name, (machine, _))| (name, machine))
    }

    fn with_degree_bound(degree_bound: DegreeBound) -> Self {
        Self {
            degree_bound,
            opcode_to_air: Default::default(),
            air_name_to_machine: Default::default(),
            _marker: PhantomData,
        }
    }

    pub fn get_air_machine(&self, air_name: &str) -> Option<&SymbolicMachine<F>> {
        self.air_name_to_machine
            .get(air_name)
            .map(|(machine, _)| machine)
    }
}

pub fn record_arena_dimension_by_air_name_per_apc_call<F, ISA: OpenVmISA>(
    apc: &Apc<F, Instr<F, ISA>, (), u32>,
    air_by_opcode_id: &OriginalAirs<F, ISA>,
) -> BTreeMap<String, RecordArenaDimension> {
    apc.instructions()
        .map(|instr| &instr.inner.opcode)
        .zip_eq(apc.subs.iter().map(|sub| sub.is_empty()))
        .fold(
            BTreeMap::new(),
            |mut acc, (opcode, should_use_dummy_arena)| {
                let air_name = air_by_opcode_id.opcode_to_air.get(opcode).unwrap();

                let entry = acc.entry(air_name.clone()).or_insert_with(|| {
                    let (_, air_metrics) =
                        air_by_opcode_id.air_name_to_machine.get(air_name).unwrap();

                    RecordArenaDimension {
                        real_height: 0,
                        width: air_metrics.widths.main,
                        dummy_height: 0,
                    }
                });
                if should_use_dummy_arena {
                    entry.dummy_height += 1;
                } else {
                    entry.real_height += 1;
                }
                acc
            },
        )
}

type ChipComplex = OriginalCpuChipComplex;

type LazyChipComplex = Option<ChipComplex>;
type CachedChipComplex = Arc<Mutex<LazyChipComplex>>;

pub struct ChipComplexGuard<'a> {
    guard: MutexGuard<'a, LazyChipComplex>,
}

impl<'a> Deref for ChipComplexGuard<'a> {
    type Target = ChipComplex;

    fn deref(&self) -> &Self::Target {
        self.guard
            .as_ref()
            .expect("Chip complex should be initialized")
    }
}

#[derive(Serialize, Deserialize, Clone)]
pub struct OriginalVmConfig<ISA: OpenVmISA> {
    pub config: ISA::Config,
    #[serde(skip)]
    pub chip_complex: CachedChipComplex,
}

impl<ISA: OpenVmISA> VmCircuitConfig<BabyBearSC> for OriginalVmConfig<ISA> {
    fn create_airs(&self) -> Result<AirInventory<BabyBearSC>, AirInventoryError> {
        self.config.create_airs()
    }
}

impl<ISA: OpenVmISA> VmExecutionConfig<BabyBear> for OriginalVmConfig<ISA> {
    type Executor = <ISA::Config as VmExecutionConfig<BabyBear>>::Executor;

    fn create_executors(
        &self,
    ) -> Result<ExecutorInventory<Self::Executor>, ExecutorInventoryError> {
        self.config.create_executors()
    }
}

impl<ISA: OpenVmISA> AsRef<SystemConfig> for OriginalVmConfig<ISA> {
    fn as_ref(&self) -> &SystemConfig {
        self.config.as_ref()
    }
}

impl<ISA: OpenVmISA> AsMut<SystemConfig> for OriginalVmConfig<ISA> {
    fn as_mut(&mut self) -> &mut SystemConfig {
        self.config.as_mut()
    }
}

impl<ISA: OpenVmISA> OriginalVmConfig<ISA> {
    pub fn new(config: ISA::Config) -> Self {
        Self {
            config,
            chip_complex: Default::default(),
        }
    }

    pub fn config(&self) -> &ISA::Config {
        &self.config
    }

    pub fn config_mut(&mut self) -> &mut ISA::Config {
        let mut guard = self.chip_complex.lock().expect("Mutex poisoned");
        *guard = None;
        &mut self.config
    }

    pub fn chip_complex(&self) -> ChipComplexGuard<'_> {
        let mut guard = self.chip_complex.lock().expect("Mutex poisoned");

        if guard.is_none() {
            let airs = self
                .config
                .create_airs()
                .expect("Failed to create air inventory");
            let complex = ISA::create_original_chip_complex(&self.config, airs)
                .expect("Failed to create chip complex");
            *guard = Some(complex);
        }

        ChipComplexGuard { guard }
    }

    pub fn airs(
        &self,
        degree_bound: DegreeBound,
    ) -> Result<OriginalAirs<BabyBear, ISA>, UnsupportedOpenVmReferenceError> {
        let chip_complex = &self.chip_complex();
        let chip_inventory = &chip_complex.inventory;

        let executor_inventory = self.create_executors().unwrap();
        let instruction_allowlist = ISA::allowed_opcodes();

        instruction_allowlist
            .into_iter()
            .filter_map(|op| {
                executor_inventory
                    .instruction_lookup
                    .get(&op)
                    .map(|id| (op, *id as usize))
            })
            .map(|(op, executor_id)| {
                let insertion_index = chip_inventory.executor_idx_to_insertion_idx[executor_id];
                let air_ref = &chip_inventory.airs().ext_airs()[insertion_index];
                (op, air_ref)
            })
            .try_fold(
                OriginalAirs::with_degree_bound(degree_bound),
                |mut airs, (op, air_ref)| {
                    airs.insert_opcode(op, air_ref.name(), |degree_bound| {
                        let columns = get_columns(air_ref.clone());
                        let constraints = get_constraints(air_ref.clone());
                        let metrics = get_air_metrics(air_ref.clone(), degree_bound.identities);

                        let powdr_exprs = constraints
                            .constraints
                            .iter()
                            .map(|expr| try_convert(symbolic_to_algebraic(expr, &columns)))
                            .collect::<Result<Vec<_>, _>>()?;

                        let powdr_bus_interactions = constraints
                            .interactions
                            .iter()
                            .map(|expr| openvm_bus_interaction_to_powdr(expr, &columns))
                            .collect::<Result<_, _>>()?;

                        Ok((
                            SymbolicMachine {
                                constraints: powdr_exprs.into_iter().map(Into::into).collect(),
                                bus_interactions: powdr_bus_interactions,
                                derived_columns: vec![],
                            },
                            metrics,
                        ))
                    })?;

                    Ok(airs)
                },
            )
    }

    pub fn bus_map(&self) -> BusMap {
        let chip_complex = self.chip_complex();
        let inventory = &chip_complex.inventory;

        let shared_bitwise_lookup = inventory
            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
            .next();
        let shared_range_tuple_checker = inventory
            .find_chip::<SharedRangeTupleCheckerChip<2>>()
            .next();

        let system_air_inventory = inventory.airs().system();
        let connector_air = system_air_inventory.connector;
        let memory_air = &system_air_inventory.memory;

        BusMap::from_id_type_pairs(
            {
                [
                    (
                        connector_air.execution_bus.index(),
                        BusType::ExecutionBridge,
                    ),
                    (
                        // TODO: make getting memory bus index a helper function
                        match &memory_air.interface {
                            MemoryInterfaceAirs::Volatile { boundary } => {
                                boundary.memory_bus.inner.index
                            }
                            MemoryInterfaceAirs::Persistent { boundary, .. } => {
                                boundary.memory_bus.inner.index
                            }
                        },
                        BusType::Memory,
                    ),
                    (connector_air.program_bus.index(), BusType::PcLookup),
                    (
                        connector_air.range_bus.index(),
                        BusType::Other(OpenVmBusType::VariableRangeChecker),
                    ),
                ]
                .into_iter()
            }
            .chain(shared_bitwise_lookup.into_iter().map(|chip| {
                (
                    chip.bus().inner.index,
                    BusType::Other(OpenVmBusType::BitwiseLookup),
                )
            }))
            .chain(shared_range_tuple_checker.into_iter().map(|chip| {
                (
                    chip.bus().inner.index,
                    BusType::Other(OpenVmBusType::TupleRangeChecker(chip.bus().sizes)),
                )
            }))
            .map(|(id, bus_type)| (id as u64, bus_type)),
        )
    }

    pub fn chip_inventory_air_metrics(&self, max_degree: usize) -> HashMap<String, AirMetrics> {
        let inventory = &self.chip_complex().inventory;

        inventory
            .airs()
            .ext_airs()
            .iter()
            .map(|air| {
                let name = air.name();
                let metrics = get_air_metrics(air.clone(), max_degree);
                (name, metrics)
            })
            .collect()
    }
}

pub fn get_columns(air: Arc<dyn AnyRap<BabyBearSC>>) -> Vec<Arc<String>> {
    let width = air.width();
    air.columns()
        .inspect(|columns| {
            assert_eq!(columns.len(), width);
        })
        .unwrap_or_else(|| (0..width).map(|i| format!("unknown_{i}")).collect())
        .into_iter()
        .map(Arc::new)
        .collect()
}

pub fn get_name<SC: StarkGenericConfig>(air: Arc<dyn AnyRap<SC>>) -> String {
    air.name()
}

pub fn get_constraints(
    air: Arc<dyn AnyRap<BabyBearSC>>,
) -> SymbolicConstraints<p3_baby_bear::BabyBear> {
    let builder = symbolic_builder_with_degree(air, None);
    builder.constraints()
}

pub fn get_air_metrics(air: Arc<dyn AnyRap<BabyBearSC>>, max_degree: usize) -> AirMetrics {
    let main = air.width();

    let symbolic_rap_builder = symbolic_builder_with_degree(air, Some(max_degree));
    let preprocessed = symbolic_rap_builder.width().preprocessed.unwrap_or(0);

    let SymbolicConstraints {
        constraints,
        interactions,
    } = symbolic_rap_builder.constraints();

    let log_up = (find_interaction_chunks(&interactions, max_degree)
        .interaction_partitions()
        .len()
        + 1)
        * EXT_DEGREE;

    AirMetrics {
        widths: AirWidths {
            preprocessed,
            main,
            log_up,
        },
        constraints: constraints.len(),
        bus_interactions: interactions.len(),
    }
}

pub fn symbolic_builder_with_degree(
    air: Arc<dyn AnyRap<BabyBearSC>>,
    max_constraint_degree: Option<usize>,
) -> SymbolicRapBuilder<p3_baby_bear::BabyBear> {
    let perm = default_perm();
    let security_params = SecurityParameters::standard_fast();
    let config = config_from_perm(&perm, security_params);
    let air_keygen_builder = AirKeygenBuilder::new(config.pcs(), air);
    air_keygen_builder.get_symbolic_builder(max_constraint_degree)
}

#[derive(Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq, Debug)]
pub struct AirWidths {
    pub preprocessed: usize,
    pub main: usize,
    pub log_up: usize,
}

impl Add for AirWidths {
    type Output = AirWidths;
    fn add(self, rhs: AirWidths) -> AirWidths {
        AirWidths {
            preprocessed: self.preprocessed + rhs.preprocessed,
            main: self.main + rhs.main,
            log_up: self.log_up + rhs.log_up,
        }
    }
}

impl Sub for AirWidths {
    type Output = AirWidths;
    fn sub(self, rhs: AirWidths) -> AirWidths {
        AirWidths {
            preprocessed: self.preprocessed - rhs.preprocessed,
            main: self.main - rhs.main,
            log_up: self.log_up - rhs.log_up,
        }
    }
}

impl Sum<AirWidths> for AirWidths {
    fn sum<I: Iterator<Item = AirWidths>>(iter: I) -> AirWidths {
        iter.fold(AirWidths::default(), Add::add)
    }
}

impl AirWidths {
    pub fn total(&self) -> usize {
        self.preprocessed + self.main + self.log_up
    }
}

impl std::fmt::Display for AirWidths {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Total Width: {} (Preprocessed: {} Main: {}, Log Up: {})",
            self.preprocessed + self.main + self.log_up,
            self.preprocessed,
            self.main,
            self.log_up
        )
    }
}

#[derive(Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq, Debug)]
pub struct AirWidthsDiff {
    pub before: AirWidths,
    pub after: AirWidths,
}

impl AirWidthsDiff {
    pub fn new(before: AirWidths, after: AirWidths) -> Self {
        Self { before, after }
    }

    pub fn columns_saved(&self) -> AirWidths {
        self.before - self.after
    }
}

impl Add for AirWidthsDiff {
    type Output = AirWidthsDiff;

    fn add(self, rhs: AirWidthsDiff) -> AirWidthsDiff {
        AirWidthsDiff {
            before: self.before + rhs.before,
            after: self.after + rhs.after,
        }
    }
}

impl Sum<AirWidthsDiff> for AirWidthsDiff {
    fn sum<I: Iterator<Item = AirWidthsDiff>>(iter: I) -> AirWidthsDiff {
        let zero = AirWidthsDiff::new(AirWidths::default(), AirWidths::default());
        iter.fold(zero, Add::add)
    }
}


================================================
FILE: openvm/src/isa.rs
================================================
use std::collections::{BTreeSet, HashSet};
use std::sync::Arc;

use openvm_circuit::arch::{
    AirInventory, AirInventoryError, AnyEnum, ChipInventory, ChipInventoryError, DenseRecordArena,
    Executor, InterpreterExecutor, MatrixRecordArena, MeteredExecutor, PreflightExecutor,
    VmBuilder, VmChipComplex, VmCircuitExtension, VmConfig, VmExecutionConfig,
};
#[cfg(feature = "cuda")]
use openvm_circuit::system::cuda::SystemChipInventoryGPU;
use openvm_circuit::system::SystemChipInventory;
#[cfg(feature = "cuda")]
use openvm_cuda_backend::engine::GpuBabyBearPoseidon2Engine;
#[cfg(feature = "cuda")]
use openvm_cuda_backend::prover_backend::GpuBackend;
use openvm_instructions::{instruction::Instruction, VmOpcode};
use openvm_sdk::config::TranspilerConfig;
use openvm_stark_backend::{config::Val, p3_field::PrimeField32, prover::cpu::CpuBackend};
use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Engine;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_riscv_elf::debug_info::SymbolTable;

use crate::powdr_extension::trace_generator::cpu::SharedPeripheryChipsCpu;
#[cfg(feature = "cuda")]
use crate::powdr_extension::trace_generator::SharedPeripheryChipsGpu;
use crate::program::OriginalCompiledProgram;
use crate::{BabyBearSC, Instr, SpecializedExecutor};

pub type OriginalCpuChipComplex = VmChipComplex<
    BabyBearSC,
    MatrixRecordArena<Val<BabyBearSC>>,
    CpuBackend<BabyBearSC>,
    SystemChipInventory<BabyBearSC>,
>;
pub type OriginalCpuChipInventory =
    ChipInventory<BabyBearSC, MatrixRecordArena<Val<BabyBearSC>>, CpuBackend<BabyBearSC>>;

#[cfg(feature = "cuda")]
pub type OriginalGpuChipComplex =
    VmChipComplex<BabyBearSC, DenseRecordArena, GpuBackend, SystemChipInventoryGPU>;
#[cfg(feature = "cuda")]
pub type OriginalGpuChipInventory = ChipInventory<BabyBearSC, DenseRecordArena, GpuBackend>;

pub type IsaApc<F, ISA> = Arc<powdr_autoprecompiles::Apc<F, Instr<F, ISA>, (), u32>>;

pub trait OpenVmISA: Send + Sync + Clone + 'static + Default {
    /// The original linked program, for example, an elf for riscv. It must allow recovering the jump destinations.
    type LinkedProgram<'a>;

    type Executor<F: PrimeField32>: AnyEnum
        + InterpreterExecutor<F>
        + Executor<F>
        + MeteredExecutor<F>
        + PreflightExecutor<F, MatrixRecordArena<F>>
        + PreflightExecutor<F, DenseRecordArena>
        + Send
        + Sync
        + Into<SpecializedExecutor<F, Self>>;

    type Config: VmConfig<BabyBearSC>
        + VmExecutionConfig<BabyBear, Executor = Self::Executor<BabyBear>>
        + TranspilerConfig<BabyBear>;

    type CpuBuilder: Clone
        + Default
        + VmBuilder<
            BabyBearPoseidon2Engine,
            VmConfig = Self::Config,
            SystemChipInventory = SystemChipInventory<BabyBearSC>,
            RecordArena = MatrixRecordArena<Val<BabyBearSC>>,
        >;

    #[cfg(feature = "cuda")]
    type GpuBuilder: Clone
        + Default
        + VmBuilder<
            GpuBabyBearPoseidon2Engine,
            VmConfig = Self::Config,
            SystemChipInventory = SystemChipInventoryGPU,
            RecordArena = DenseRecordArena,
        >;

    fn create_dummy_airs<E: VmCircuitExtension<BabyBearSC>>(
        config: &Self::Config,
        shared_chips: E,
    ) -> Result<AirInventory<BabyBearSC>, AirInventoryError>;

    fn create_original_chip_complex(
        config: &Self::Config,
        airs: AirInventory<BabyBearSC>,
    ) -> Result<OriginalCpuChipComplex, ChipInventoryError>;

    fn create_dummy_chip_complex_cpu(
        config: &Self::Config,
        circuit: AirInventory<BabyBearSC>,
        shared_chips: SharedPeripheryChipsCpu<Self>,
    ) -> Result<OriginalCpuChipComplex, ChipInventoryError>;

    #[cfg(feature = "cuda")]
    fn create_dummy_chip_complex_gpu(
        config: &Self::Config,
        circuit: AirInventory<BabyBearSC>,
        shared_chips: SharedPeripheryChipsGpu<Self>,
    ) -> Result<OriginalGpuChipComplex, ChipInventoryError>;

    /// The set of branching opcodes
    fn branching_opcodes() -> HashSet<VmOpcode>;

    /// The set of opcodes which are allowed to be put into autoprecompiles
    fn allowed_opcodes() -> HashSet<VmOpcode>;

    /// Format an instruction of this ISA
    fn format<F: PrimeField32>(instruction: &Instruction<F>) -> String;

    fn get_symbol_table<'a>(program: &Self::LinkedProgram<'a>) -> SymbolTable;

    /// Given an original program, return the pcs which correspond to jump destinations
    fn get_jump_destinations(original_program: &OriginalCompiledProgram<Self>) -> BTreeSet<u64>;
}


================================================
FILE: openvm/src/lib.rs
================================================
#![cfg_attr(feature = "tco", allow(internal_features))]
#![cfg_attr(feature = "tco", allow(incomplete_features))]
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
#![cfg_attr(feature = "tco", feature(core_intrinsics))]

use openvm_circuit::arch::{
    AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutorInventory,
    ExecutorInventoryError, InitFileGenerator, MatrixRecordArena, RowMajorMatrixArena,
    SystemConfig, VmBuilder, VmChipComplex, VmCircuitConfig, VmCircuitExtension, VmExecutionConfig,
    VmProverExtension,
};
use openvm_circuit::system::SystemChipInventory;
use openvm_circuit::{circuit_derive::Chip, derive::AnyEnum};
use openvm_circuit_derive::{
    AotExecutor, AotMeteredExecutor, Executor, MeteredExecutor, PreflightExecutor,
};

use openvm_sdk::config::TranspilerConfig;
use openvm_sdk::GenericSdk;
use openvm_sdk::{
    config::{AppConfig, DEFAULT_APP_LOG_BLOWUP},
    StdIn,
};
use openvm_stark_backend::config::{StarkGenericConfig, Val};
use openvm_stark_backend::engine::StarkEngine;
use openvm_stark_backend::prover::cpu::{CpuBackend, CpuDevice};
use openvm_stark_backend::prover::hal::ProverBackend;
use openvm_stark_sdk::config::{
    baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
    FriParameters,
};
use openvm_stark_sdk::openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use openvm_transpiler::transpiler::Transpiler;
use powdr_autoprecompiles::evaluation::AirStats;
use powdr_autoprecompiles::execution_profile::ExecutionProfile;
use powdr_autoprecompiles::DegreeBound;
use powdr_autoprecompiles::{execution_profile::execution_profile, PowdrConfig};
use powdr_extension::PowdrExtension;
use serde::{Deserialize, Serialize};
use std::iter::Sum;
use std::marker::PhantomData;
use std::ops::Add;
use std::path::Path;

use crate::isa::OpenVmISA;
use crate::powdr_extension::chip::PowdrAir;
pub use crate::program::Prog;
pub use crate::program::{CompiledProgram, OriginalCompiledProgram};

use crate::extraction_utils::AirWidthsDiff;
use crate::extraction_utils::{get_air_metrics, AirWidths, OriginalVmConfig};
use crate::powdr_extension::{PowdrExtensionExecutor, PowdrPrecompile};

mod air_builder;
pub mod cuda_abi;
pub mod empirical_constraints;
pub mod extraction_utils;
pub mod program;
pub mod trace_generation;
pub mod utils;
pub use powdr_openvm_bus_interaction_handler::bus_map;

#[cfg(feature = "test-utils")]
pub mod test_utils;

pub use crate::empirical_constraints::detect_empirical_constraints;

pub type BabyBearSC = BabyBearPoseidon2Config;

cfg_if::cfg_if! {
    if #[cfg(feature = "cuda")] {
        pub use openvm_cuda_backend::engine::GpuBabyBearPoseidon2Engine;
        pub use openvm_native_circuit::NativeGpuBuilder;
        pub type PowdrSdkGpu<ISA> = GenericSdk<GpuBabyBearPoseidon2Engine, SpecializedConfigGpuBuilder<ISA>, NativeGpuBuilder>;
        pub type PowdrExecutionProfileSdkGpu<ISA> = GenericSdk<GpuBabyBearPoseidon2Engine, <ISA as OpenVmISA>::GpuBuilder, NativeGpuBuilder>;

        pub use openvm_circuit::system::cuda::{extensions::SystemGpuBuilder, SystemChipInventoryGPU};
        pub use openvm_sdk::config::SdkVmGpuBuilder;
        pub use openvm_cuda_backend::prover_backend::GpuBackend;
        pub use openvm_circuit_primitives::bitwise_op_lookup::BitwiseOperationLookupChipGPU;
        pub use openvm_circuit_primitives::range_tuple::RangeTupleCheckerChipGPU;
        pub use openvm_circuit_primitives::var_range::VariableRangeCheckerChipGPU;
        pub use openvm_cuda_backend::base::DeviceMatrix;
        pub use openvm_circuit::arch::DenseRecordArena;
    }
}

use openvm_circuit_primitives::bitwise_op_lookup::{
    BitwiseOperationLookupAir, SharedBitwiseOperationLookupChip,
};
use openvm_circuit_primitives::range_tuple::{RangeTupleCheckerAir, SharedRangeTupleCheckerChip};
use openvm_circuit_primitives::var_range::{
    SharedVariableRangeCheckerChip, VariableRangeCheckerAir,
};
use openvm_native_circuit::NativeCpuBuilder;
pub type PowdrSdkCpu<ISA> =
    GenericSdk<BabyBearPoseidon2Engine, SpecializedConfigCpuBuilder<ISA>, NativeCpuBuilder>;
pub type PowdrExecutionProfileSdkCpu<ISA> =
    GenericSdk<BabyBearPoseidon2Engine, <ISA as OpenVmISA>::CpuBuilder, NativeCpuBuilder>;

pub const DEFAULT_OPENVM_DEGREE_BOUND: usize = 2 * DEFAULT_APP_LOG_BLOWUP + 1;
pub const DEFAULT_DEGREE_BOUND: DegreeBound = DegreeBound {
    identities: DEFAULT_OPENVM_DEGREE_BOUND,
    bus_interactions: DEFAULT_OPENVM_DEGREE_BOUND - 1,
};

pub fn default_powdr_openvm_config(apc: u64, skip: u64) -> PowdrConfig {
    PowdrConfig::new(apc, skip, DEFAULT_DEGREE_BOUND)
}

pub fn format_fe<F: PrimeField32>(v: F) -> String {
    let v = v.as_canonical_u32();
    if v < F::ORDER_U32 / 2 {
        format!("{v}")
    } else {
        format!("-{}", F::ORDER_U32 - v)
    }
}

/// We do not use the transpiler, instead we customize an already transpiled program
pub mod customize_exe;

pub use customize_exe::{customize, BabyBearOpenVmApcAdapter, Instr, POWDR_OPCODE};

// A module for our extension
pub mod isa;
pub mod powdr_extension;

/// A custom VmConfig that wraps the SdkVmConfig, adding our custom extension.
#[derive(Serialize, Deserialize, Clone)]
#[serde(bound = "")]
pub struct SpecializedConfig<ISA: OpenVmISA> {
    pub original: OriginalVmConfig<ISA>,
    pub powdr: PowdrExtension<BabyBear, ISA>,
}

#[cfg(feature = "cuda")]
#[derive(Default, Clone)]
pub struct SpecializedConfigGpuBuilder<ISA> {
    _marker: PhantomData<ISA>,
}

#[cfg(feature = "cuda")]
impl<ISA: OpenVmISA> VmBuilder<GpuBabyBearPoseidon2Engine> for SpecializedConfigGpuBuilder<ISA> {
    type VmConfig = SpecializedConfig<ISA>;
    type SystemChipInventory = SystemChipInventoryGPU;
    type RecordArena = DenseRecordArena;

    fn create_chip_complex(
        &self,
        config: &SpecializedConfig<ISA>,
        circuit: AirInventory<BabyBearSC>,
    ) -> Result<
        VmChipComplex<BabyBearSC, Self::RecordArena, GpuBackend, Self::SystemChipInventory>,
        ChipInventoryError,
    > {
        let mut chip_complex = VmBuilder::<GpuBabyBearPoseidon2Engine>::create_chip_complex(
            &<ISA as OpenVmISA>::GpuBuilder::default(),
            &config.original.config,
            circuit,
        )?;
        let inventory = &mut chip_complex.inventory;
        VmProverExtension::<GpuBabyBearPoseidon2Engine, _, _>::extend_prover(
            &PowdrGpuProverExt::<ISA>::default(),
            &config.powdr,
            inventory,
        )?;
        Ok(chip_complex)
    }
}

#[derive(Default, Clone)]
pub struct SpecializedConfigCpuBuilder<ISA> {
    _marker: PhantomData<ISA>,
}

impl<E, ISA: OpenVmISA> VmBuilder<E> for SpecializedConfigCpuBuilder<ISA>
where
    E: StarkEngine<SC = BabyBearSC, PB = CpuBackend<BabyBearSC>, PD = CpuDevice<BabyBearSC>>,
    ISA::CpuBuilder: VmBuilder<
        E,
        VmConfig = ISA::Config,
        SystemChipInventory = SystemChipInventory<BabyBearSC>,
        RecordArena = MatrixRecordArena<Val<BabyBearSC>>,
    >,
{
    type VmConfig = SpecializedConfig<ISA>;
    type SystemChipInventory = SystemChipInventory<BabyBearSC>;
    type RecordArena = MatrixRecordArena<Val<BabyBearSC>>;

    fn create_chip_complex(
        &self,
        config: &SpecializedConfig<ISA>,
        circuit: AirInventory<BabyBearSC>,
    ) -> Result<
        VmChipComplex<BabyBearSC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
        ChipInventoryError,
    > {
        let mut chip_complex = VmBuilder::<E>::create_chip_complex(
            &<ISA as OpenVmISA>::CpuBuilder::default(),
            &config.original.config,
            circuit,
        )?;
        let inventory = &mut chip_complex.inventory;
        VmProverExtension::<E, _, _>::extend_prover(
            &PowdrCpuProverExt::<ISA>::default(),
            &config.powdr,
            inventory,
        )?;
        Ok(chip_complex)
    }
}

#[cfg(feature = "cuda")]
#[derive(Default)]
struct PowdrGpuProverExt<ISA> {
    _marker: PhantomData<ISA>,
}

#[cfg(feature = "cuda")]
impl<ISA: OpenVmISA>
    VmProverExtension<GpuBabyBearPoseidon2Engine, DenseRecordArena, PowdrExtension<BabyBear, ISA>>
    for PowdrGpuProverExt<ISA>
{
    fn extend_prover(
        &self,
        extension: &PowdrExtension<BabyBear, ISA>,
        inventory: &mut ChipInventory<BabyBearSC, DenseRecordArena, GpuBackend>,
    ) -> Result<(), ChipInventoryError> {
        use std::sync::Arc;
        // TODO: here we make assumptions about the existence of some chips in the periphery. Make this more flexible

        use crate::powdr_extension::trace_generator::cuda::PowdrPeripheryInstancesGpu;
        let bitwise_lookup = inventory
            .find_chip::<Arc<BitwiseOperationLookupChipGPU<8>>>()
            .next()
            .cloned();
        let range_checker = inventory
            .find_chip::<Arc<VariableRangeCheckerChipGPU>>()
            .next()
            .unwrap();
        let tuple_range_checker = inventory
            .find_chip::<Arc<RangeTupleCheckerChipGPU<2>>>()
            .next()
            .cloned();

        // Create the shared chips and the dummy shared chips
        let shared_chips_pair = PowdrPeripheryInstancesGpu::new(
            range_checker.clone(),
            bitwise_lookup,
            tuple_range_checker,
            get_periphery_bus_ids(inventory),
        );

        for precompile in &extension.precompiles {
            use crate::powdr_extension::chip::PowdrChipGpu;

            inventory.next_air::<PowdrAir<BabyBear>>()?;
            let chip = PowdrChipGpu::new(
                precompile.clone(),
                extension.airs.clone(),
                extension.base_config.clone(),
                shared_chips_pair.clone(),
            );
            inventory.add_executor_chip(chip);
        }

        Ok(())
    }
}

#[derive(Clone)]
pub struct PeripheryBusIds {
    pub range_checker: u16,
    pub bitwise_lookup: Option<u16>,
    pub tuple_range_checker: Option<u16>,
}

#[derive(Clone, Default)]
pub struct PowdrCpuProverExt<ISA> {
    _marker: PhantomData<ISA>,
}

impl<E, RA, ISA: OpenVmISA> VmProverExtension<E, RA, PowdrExtension<BabyBear, ISA>>
    for PowdrCpuProverExt<ISA>
where
    E: StarkEngine<SC = BabyBearSC, PB = CpuBackend<BabyBearSC>, PD = CpuDevice<BabyBearSC>>,
    RA: RowMajorMatrixArena<BabyBear>,
{
    fn extend_prover(
        &self,
        extension: &PowdrExtension<BabyBear, ISA>,
        inventory: &mut ChipInventory<<E as StarkEngine>::SC, RA, <E as StarkEngine>::PB>,
    ) -> Result<(), ChipInventoryError> {
        // TODO: here we make assumptions about the existence of some chips in the periphery. Make this more flexible

        use crate::powdr_extension::trace_generator::cpu::PowdrPeripheryInstancesCpu;
        let bitwise_lookup = inventory
            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
            .next()
            .cloned();
        let range_checker = inventory
            .find_chip::<SharedVariableRangeCheckerChip>()
            .next()
            .unwrap();
        let tuple_range_checker = inventory
            .find_chip::<SharedRangeTupleCheckerChip<2>>()
            .next()
            .cloned();

        // Create the shared chips and the dummy shared chips
        let shared_chips_pair = PowdrPeripheryInstancesCpu::new(
            range_checker.clone(),
            bitwise_lookup,
            tuple_range_checker,
            get_periphery_bus_ids(inventory),
        );

        for precompile in &extension.precompiles {
            use crate::powdr_extension::chip::PowdrChipCpu;

            inventory.next_air::<PowdrAir<BabyBear>>()?;
            let chip = PowdrChipCpu::new(
                precompile.clone(),
                extension.airs.clone(),
                extension.base_config.clone(),
                shared_chips_pair.clone(),
            );
            inventory.add_executor_chip(chip);
        }

        Ok(())
    }
}

// Helper function to get the periphery bus ids from the `AirInventory`.
// This is the most robust method because bus ids are assigned at air creation time.
fn get_periphery_bus_ids<SC, RA, PB>(inventory: &ChipInventory<SC, RA, PB>) -> PeripheryBusIds
where
    SC: StarkGenericConfig,
    PB: ProverBackend,
{
    let air_inventory = inventory.airs();
    let range_checker_bus_id = air_inventory
        .find_air::<VariableRangeCheckerAir>()
        .next()
        .unwrap()
        .bus
        .inner
        .index;
    let bitwise_lookup_bus_id = air_inventory
        .find_air::<BitwiseOperationLookupAir<8>>()
        .next()
        .map(|air| air.bus.inner.index);
    let tuple_range_checker_bus_id = air_inventory
        .find_air::<RangeTupleCheckerAir<2>>()
        .next()
        .map(|air| air.bus.inner.index);

    PeripheryBusIds {
        range_checker: range_checker_bus_id,
        bitwise_lookup: bitwise_lookup_bus_id,
        tuple_range_checker: tuple_range_checker_bus_id,
    }
}

impl<ISA: OpenVmISA> TranspilerConfig<BabyBear> for SpecializedConfig<ISA> {
    fn transpiler(&self) -> Transpiler<BabyBear> {
        self.original.config().transpiler()
    }
}

// For generation of the init file, we delegate to the underlying SdkVmConfig.
impl<ISA: OpenVmISA> InitFileGenerator for SpecializedConfig<ISA> {
    fn generate_init_file_contents(&self) -> Option<String> {
        self.original.config().generate_init_file_contents()
    }

    fn write_to_init_file(
        &self,
        manifest_dir: &Path,
        init_file_name: Option<&str>,
    ) -> std::io::Result<()> {
        self.original
            .config()
            .write_to_init_file(manifest_dir, init_file_name)
    }
}

impl<ISA: OpenVmISA> AsRef<SystemConfig> for SpecializedConfig<ISA> {
    fn as_ref(&self) -> &SystemConfig {
        self.original.as_ref()
    }
}

impl<ISA: OpenVmISA> AsMut<SystemConfig> for SpecializedConfig<ISA> {
    fn as_mut(&mut self) -> &mut SystemConfig {
        self.original.as_mut()
    }
}

#[allow(clippy::large_enum_variant)]
#[derive(
    AnyEnum, Chip, Executor, MeteredExecutor, AotExecutor, AotMeteredExecutor, PreflightExecutor,
)]
pub enum SpecializedExecutor<F: PrimeField32, ISA: OpenVmISA> {
    #[any_enum]
    OriginalExecutor(ISA::Executor<F>),
    #[any_enum]
    PowdrExecutor(PowdrExtensionExecutor<ISA>),
}

// We implement `From` by hand because we cannot prove that `ISA::Executor != PowdrExtensionExecutor`
impl<F: PrimeField32, ISA: OpenVmISA> From<PowdrExtensionExecutor<ISA>>
    for SpecializedExecutor<F, ISA>
{
    fn from(value: PowdrExtensionExecutor<ISA>) -> Self {
        Self::PowdrExecutor(value)
    }
}

// TODO: derive VmCircuitConfig, currently not possible because we don't have SC/F everywhere
// Also `start_new_extension` is normally only used in derive
impl<ISA: OpenVmISA> VmCircuitConfig<BabyBearSC> for SpecializedConfig<ISA> {
    fn create_airs(&self) -> Result<AirInventory<BabyBearSC>, AirInventoryError> {
        let mut inventory = self.original.create_airs()?;
        inventory.start_new_extension();
        self.powdr.extend_circuit(&mut inventory)?;
        Ok(inventory)
    }
}

impl<ISA: OpenVmISA> VmExecutionConfig<BabyBear> for SpecializedConfig<ISA> {
    type Executor = SpecializedExecutor<BabyBear, ISA>;

    fn create_executors(
        &self,
    ) -> Result<ExecutorInventory<Self::Executor>, ExecutorInventoryError> {
        let mut inventory = self.original.create_executors()?.transmute();
        inventory = inventory.extend(&self.powdr)?;
        Ok(inventory)
    }
}

impl<ISA: OpenVmISA> SpecializedConfig<ISA> {
    pub fn new(
        base_config: OriginalVmConfig<ISA>,
        precompiles: Vec<PowdrPrecompile<BabyBear, ISA>>,
        degree_bound: DegreeBound,
    ) -> Self {
        let airs = base_config.airs(degree_bound).expect("Failed to convert the AIR of an OpenVM instruction, even after filtering by the blacklist!");
        let bus_map = base_config.bus_map();
        let powdr_extension = PowdrExtension::new(precompiles, base_config.clone(), bus_map, airs);
        Self {
            original: base_config,
            powdr: powdr_extension,
        }
    }
}

#[derive(Clone, Serialize, Deserialize, Default, Debug, Eq, PartialEq)]
pub struct AirMetrics {
    pub widths: AirWidths,
    pub constraints: usize,
    pub bus_interactions: usize,
}

impl From<AirMetrics> for AirStats {
    fn from(metrics: AirMetrics) -> Self {
        AirStats {
            main_columns: metrics.widths.main,
            constraints: metrics.constraints,
            bus_interactions: metrics.bus_interactions,
        }
    }
}

impl Add for AirMetrics {
    type Output = AirMetrics;

    fn add(self, rhs: AirMetrics) -> AirMetrics {
        AirMetrics {
            widths: self.widths + rhs.widths,
            constraints: self.constraints + rhs.constraints,
            bus_interactions: self.bus_interactions + rhs.bus_interactions,
        }
    }
}

impl Sum<AirMetrics> for AirMetrics {
    fn sum<I: Iterator<Item = AirMetrics>>(iter: I) -> AirMetrics {
        iter.fold(AirMetrics::default(), Add::add)
    }
}

impl AirMetrics {
    pub fn total_width(&self) -> usize {
        self.widths.total()
    }
}

impl<ISA: OpenVmISA> CompiledProgram<ISA> {
    // Return a tuple of (powdr AirMetrics, non-powdr AirMetrics)
    pub fn air_metrics(
        &self,
        max_degree: usize,
    ) -> (Vec<(AirMetrics, Option<AirWidthsDiff>)>, Vec<AirMetrics>) {
        let air_inventory = self.vm_config.create_airs().unwrap();

        let chip_complex = <SpecializedConfigCpuBuilder<ISA> as VmBuilder<
            BabyBearPoseidon2Engine,
        >>::create_chip_complex(
            &SpecializedConfigCpuBuilder::default(),
            &self.vm_config,
            air_inventory,
        )
        .unwrap();

        let inventory = chip_complex.inventory;

        // Order of precompile is the same as that of Powdr executors in chip inventory
        let mut apc_stats = self
            .vm_config
            .powdr
            .precompiles
            .iter()
            .map(|precompile| precompile.apc_stats.clone());

        inventory.airs().ext_airs().iter().fold(
            (Vec::new(), Vec::new()),
            |(mut powdr_air_metrics, mut non_powdr_air_metrics), air| {
                let name = air.name();
                // We actually give name "powdr_air_for_opcode_<opcode>" to the AIRs,
                // but OpenVM uses the actual Rust type (PowdrAir) as the name in this method.
                // TODO this is hacky but not sure how to do it better rn.
                if name.starts_with("PowdrAir") {
                    powdr_air_metrics.push((
                        get_air_metrics(air.clone(), max_degree),
                        Some(apc_stats.next().unwrap().widths),
                    ));
                } else {
                    non_powdr_air_metrics.push(get_air_metrics(air.clone(), max_degree));
                }

                (powdr_air_metrics, non_powdr_air_metrics)
            },
        )
    }
}

pub fn execute<ISA: OpenVmISA>(
    program: CompiledProgram<ISA>,
    inputs: StdIn,
) -> Result<(), Box<dyn std::error::Error>> {
    let CompiledProgram { exe, vm_config } = program;

    // Set app configuration
    let app_fri_params =
        FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_APP_LOG_BLOWUP);
    let app_config = AppConfig::new(app_fri_params, vm_config.clone());

    // prepare for execute
    #[cfg(feature = "cuda")]
    let sdk = PowdrSdkGpu::new(app_config).unwrap();
    #[cfg(not(feature = "cuda"))]
    let sdk = PowdrSdkCpu::new(app_config).unwrap();

    let output = sdk.execute(exe.clone(), inputs.clone()).unwrap();

    tracing::info!("Public values output: {:?}", output);

    Ok(())
}

// Generate execution profile for a guest program
pub fn execution_profile_from_guest<ISA: OpenVmISA>(
    program: &OriginalCompiledProgram<ISA>,
    inputs: StdIn,
) -> ExecutionProfile {
    let OriginalCompiledProgram { exe, vm_config, .. } = program;
    let program = Prog::from(&exe.program);

    // Set app configuration
    let app_fri_params =
        FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_APP_LOG_BLOWUP);
    let app_config = AppConfig::new(app_fri_params, vm_config.clone().config);

    // prepare for execute
    let sdk = PowdrExecutionProfileSdkCpu::<ISA>::new(app_config).unwrap();

    execution_profile::<BabyBearOpenVmApcAdapter<ISA>>(&program, || {
        sdk.execute_interpreted(exe.clone(), inputs.clone())
            .unwrap();
    })
}


================================================
FILE: openvm/src/powdr_extension/chip.rs
================================================
// Mostly taken from [this openvm extension](https://github.com/openvm-org/openvm/blob/1b76fd5a900a7d69850ee9173969f70ef79c4c76/extensions/rv32im/circuit/src/auipc/core.rs#L1)

use std::{cell::RefCell, collections::BTreeMap, rc::Rc};

use crate::{
    extraction_utils::{OriginalAirs, OriginalVmConfig},
    isa::OpenVmISA,
    powdr_extension::{
        executor::OriginalArenas,
        trace_generator::cpu::{PowdrPeripheryInstancesCpu, PowdrTraceGeneratorCpu},
        PowdrPrecompile,
    },
};

use itertools::Itertools;
use openvm_circuit::arch::MatrixRecordArena;
use openvm_stark_backend::{
    p3_air::{Air, BaseAir},
    rap::ColumnsAir,
};

use openvm_stark_backend::{
    interaction::InteractionBuilder,
    p3_field::PrimeField32,
    p3_matrix::Matrix,
    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
};
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::{
    expression::{AlgebraicEvaluator, AlgebraicReference, WitnessEvaluator},
    symbolic_machine::SymbolicMachine,
};

pub struct PowdrChipCpu<ISA: OpenVmISA> {
    pub name: String,
    pub record_arena_by_air_name: Rc<RefCell<OriginalArenas<MatrixRecordArena<BabyBear>>>>,
    pub trace_generator: PowdrTraceGeneratorCpu<ISA>,
}

impl<ISA: OpenVmISA> PowdrChipCpu<ISA> {
    pub(crate) fn new(
        precompile: PowdrPrecompile<BabyBear, ISA>,
        original_airs: OriginalAirs<BabyBear, ISA>,
        base_config: OriginalVmConfig<ISA>,
        periphery: PowdrPeripheryInstancesCpu<ISA>,
    ) -> Self {
        let PowdrPrecompile {
            name,
            apc,
            apc_record_arena_cpu: apc_record_arena,
            ..
        } = precompile;
        let trace_generator =
            PowdrTraceGeneratorCpu::new(apc, original_airs, base_config, periphery);

        Self {
            name,
            record_arena_by_air_name: apc_record_arena,
            trace_generator,
        }
    }
}

pub struct PowdrAir<F> {
    /// The columns in arbitrary order
    columns: Vec<AlgebraicReference>,
    machine: SymbolicMachine<F>,
}

impl<F: PrimeField32> ColumnsAir<F> for PowdrAir<F> {
    fn columns(&self) -> Option<Vec<String>> {
        Some(self.columns.iter().map(|c| (*c.name).clone()).collect())
    }
}

impl<F: PrimeField32> PowdrAir<F> {
    pub fn new(machine: SymbolicMachine<F>) -> Self {
        Self {
            columns: machine.main_columns().collect(),
            machine,
        }
    }
}

impl<F: PrimeField32> BaseAir<F> for PowdrAir<F> {
    fn width(&self) -> usize {
        let res = self.columns.len();
        assert!(res > 0);
        res
    }
}

// No public values, but the trait is implemented
impl<F: PrimeField32> BaseAirWithPublicValues<F> for PowdrAir<F> {}

impl<AB: InteractionBuilder> Air<AB> for PowdrAir<AB::F>
where
    AB::F: PrimeField32,
{
    fn eval(&self, builder: &mut AB) {
        let main = builder.main();
        let witnesses = main.row_slice(0);
        // TODO: cache?
        let witness_values: BTreeMap<u64, AB::Var> = self
            .columns
            .iter()
            .map(|c| c.id)
            .zip_eq(witnesses.iter().cloned())
            .collect();

        let witness_evaluator = WitnessEvaluator::new(&witness_values);

        for constraint in &self.machine.constraints {
            let constraint = witness_evaluator.eval_constraint(constraint);
            builder.assert_zero(constraint.expr);
        }

        for interaction in &self.machine.bus_interactions {
            let interaction = witness_evaluator.eval_bus_interaction(interaction);
            // TODO: is this correct?
            let count_weight = 1;

            builder.push_interaction(
                interaction.id as u16,
                interaction.args,
                interaction.mult,
                count_weight,
            );
        }
    }
}

impl<F: PrimeField32> PartitionedBaseAir<F> for PowdrAir<F> {}

#[cfg(feature = "cuda")]
mod cuda {
    use std::{cell::RefCell, rc::Rc};

    use openvm_circuit::arch::DenseRecordArena;
    use openvm_stark_sdk::p3_baby_bear::BabyBear;

    use crate::{
        extraction_utils::{OriginalAirs, OriginalVmConfig},
        isa::OpenVmISA,
        powdr_extension::{
            executor::OriginalArenas,
            trace_generator::cuda::{PowdrPeripheryInstancesGpu, PowdrTraceGeneratorGpu},
            PowdrPrecompile,
        },
    };

    pub struct PowdrChipGpu<ISA: OpenVmISA> {
        pub name: String,
        pub record_arena_by_air_name: Rc<RefCell<OriginalArenas<DenseRecordArena>>>,
        pub trace_generator: PowdrTraceGeneratorGpu<ISA>,
    }

    impl<ISA: OpenVmISA> PowdrChipGpu<ISA> {
        pub(crate) fn new(
            precompile: PowdrPrecompile<BabyBear, ISA>,
            original_airs: OriginalAirs<BabyBear, ISA>,
            base_config: OriginalVmConfig<ISA>,
            periphery: PowdrPeripheryInstancesGpu<ISA>,
        ) -> Self {
            let PowdrPrecompile {
                name,
                apc,
                apc_record_arena_gpu: apc_record_arena,
                ..
            } = precompile;
            let trace_generator =
                PowdrTraceGeneratorGpu::new(apc, original_airs, base_config, periphery);

            Self {
                name,
                record_arena_by_air_name: apc_record_arena,
                trace_generator,
            }
        }
    }
}
#[cfg(feature = "cuda")]
pub use cuda::*;


================================================
FILE: openvm/src/powdr_extension/executor/mod.rs
================================================
use std::{
    borrow::{Borrow, BorrowMut},
    cell::RefCell,
    collections::HashMap,
    rc::Rc,
};

use crate::{
    customize_exe::OpenVmExecutionState,
    extraction_utils::{
        record_arena_dimension_by_air_name_per_apc_call, OriginalAirs, OriginalVmConfig,
    },
    isa::{IsaApc, OpenVmISA},
};

use itertools::Itertools;
use openvm_circuit::arch::InterpreterMeteredExecutor;
use openvm_circuit::arch::{
    execution_mode::{ExecutionCtx, MeteredCtx},
    Arena, DenseRecordArena, E2PreCompute, InterpreterExecutor, MatrixRecordArena,
    PreflightExecutor,
};
#[cfg(feature = "aot")]
use openvm_circuit::arch::{AotExecutor, AotMeteredExecutor};
use openvm_circuit_derive::create_handler;
use openvm_circuit_primitives::AlignedBytesBorrow;
use openvm_instructions::instruction::Instruction;
use openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::{
    execution::{OptimisticConstraintEvaluator, OptimisticConstraints},
    InstructionHandler,
};

use openvm_circuit::{
    arch::{
        ExecuteFunc, ExecutionCtxTrait, ExecutionError, ExecutorInventory,
        MeteredExecutionCtxTrait, StaticProgramError, VmExecState, VmExecutionConfig, VmStateMut,
    },
    system::memory::online::{GuestMemory, TracingMemory},
};

/// A struct which holds the state of the execution based on the original instructions in this block and a dummy inventory.
/// It holds arenas for each original use for both cpu and gpu execution, so that this struct can be agnostic to the execution backend.
/// When using the cpu backend, only `original_arenas_cpu` is used, and vice versa for gpu execution.
pub struct PowdrExecutor<ISA: OpenVmISA> {
    pub air_by_opcode_id: OriginalAirs<BabyBear, ISA>,
    pub executor_inventory: ExecutorInventory<ISA::Executor<BabyBear>>,
    pub apc: IsaApc<BabyBear, ISA>,
    pub original_arenas_cpu: Rc<RefCell<OriginalArenas<MatrixRecordArena<BabyBear>>>>,
    pub original_arenas_gpu: Rc<RefCell<OriginalArenas<DenseRecordArena>>>,
    pub height_change: u32,
    cached_instructions_meta: Vec<CachedInstructionMeta>,
}

/// A shared mutable reference to the arenas used to store the traces of the original instructions, accessed during preflight execution and trace generation.
/// The same reference is reused for all segments, under the assumption that segments are executed sequentially: preflight_0, tracegen_0, preflight_1, tracegen_1, ...
/// It goes through the following cycle for each segment:
/// - initialized at the beginning of preflight execution with the correct sizes for this segment
/// - written to during preflight execution
/// - read from during trace generation
/// - reset to uninitialized after trace generation
#[derive(Default)]
pub enum OriginalArenas<A> {
    #[default]
    Uninitialized,
    Initialized(InitializedOriginalArenas<A>),
}

impl<A: Arena> OriginalArenas<A> {
    /// Given an estimate of how many times the APC is called in this segment, and the original airs and apc,
    /// initializes the arenas iff not already initialized.
    fn ensure_initialized<ISA: OpenVmISA>(
        &mut self,
        apc_call_count_estimate: impl Fn() -> usize,
        original_airs: &OriginalAirs<BabyBear, ISA>,
        apc: &IsaApc<BabyBear, ISA>,
    ) -> &mut InitializedOriginalArenas<A> {
        match self {
            OriginalArenas::Uninitialized => {
                *self = OriginalArenas::Initialized(InitializedOriginalArenas::new(
                    apc_call_count_estimate(),
                    original_airs,
                    apc,
                ));
                match self {
                    OriginalArenas::Initialized(i) => i,
                    _ => unreachable!(),
                }
            }
            OriginalArenas::Initialized(i) => i,
        }
    }
}

/// A collection of arenas used to store the records of the original instructions, one per air name.
/// Each arena is initialized with a capacity based on an estimate of how many times the APC is called in this segment
/// and how many calls to each air are made per APC call.
#[derive(Default)]
pub struct InitializedOriginalArenas<A> {
    arenas: Vec<Option<ArenaPair<A>>>,
    air_name_to_arena_index: HashMap<String, usize>,
    pub number_of_calls: usize,
}

impl<A: Arena> InitializedOriginalArenas<A> {
    /// Creates a new instance of `InitializedOriginalArenas`.
    pub fn new<ISA: OpenVmISA>(
        apc_call_count_estimate: usize,
        original_airs: &OriginalAirs<BabyBear, ISA>,
        apc: &IsaApc<BabyBear, ISA>,
    ) -> Self {
        let record_arena_dimensions =
            record_arena_dimension_by_air_name_per_apc_call(apc, original_airs);
        let (air_name_to_arena_index, arenas) =
            record_arena_dimensions.into_iter().enumerate().fold(
                (HashMap::new(), Vec::new()),
                |(mut air_name_to_arena_index, mut arenas),
                 (
                    idx,
                    (
                        air_name,
                        RecordArenaDimension {
                            real_height,
                            width: air_width,
                            dummy_height,
                        },
                    ),
                )| {
                    air_name_to_arena_index.insert(air_name, idx);
                    arenas.push(Some(ArenaPair {
                        real: A::with_capacity(real_height * apc_call_count_estimate, air_width),
                        dummy: A::with_capacity(dummy_height * apc_call_count_estimate, air_width),
                    }));
                    (air_name_to_arena_index, arenas)
                },
            );

        Self {
            arenas,
            air_name_to_arena_index,
            // This is the actual number of calls, which we don't know yet. It will be updated during preflight execution.
            number_of_calls: 0,
        }
    }

    #[inline]
    fn arena_mut_by_index(&mut self, index: usize) -> &mut ArenaPair<A> {
        self.arenas
            .get_mut(index)
            .and_then(|arena| arena.as_mut())
            .expect("arena missing for index")
    }

    #[inline]
    fn real_arena_mut_by_index(&mut self, index: usize) -> &mut A {
        &mut self.arena_mut_by_index(index).real
    }

    #[inline]
    fn dummy_arena_mut_by_index(&mut self, index: usize) -> &mut A {
        &mut self.arena_mut_by_index(index).dummy
    }

    pub fn take_real_arena(&mut self, air_name: &str) -> Option<A> {
        let index = *self.air_name_to_arena_index.get(air_name)?;
        self.arenas[index].take().map(|arena_pair| arena_pair.real)
    }
}

pub struct ArenaPair<A> {
    pub real: A,
    pub dummy: A,
}

/// The dimensions of a record arena for a given air name, used to initialize the arenas.
pub struct RecordArenaDimension {
    pub real_height: usize,
    pub width: usize,
    pub dummy_height: usize,
}

#[derive(Clone, Copy)]
struct CachedInstructionMeta {
    executor_index: usize,
    arena_index: usize,
    should_use_real_arena: bool,
}

/// A struct to interpret the pre-compute data as for PowdrExecutor.
#[derive(AlignedBytesBorrow, Clone)]
#[repr(C)]
struct PowdrPreCompute<F, Ctx> {
    height_change: u32,
    original_instructions: Vec<(ExecuteFunc<F, Ctx>, Vec<u8>)>,
    optimistic_constraints: OptimisticConstraints<(), u32>,
}

impl<ISA: OpenVmISA> InterpreterExecutor<BabyBear> for PowdrExecutor<ISA> {
    fn pre_compute_size(&self) -> usize {
        // TODO: do we know `ExecutionCtx` is correct? It's only one implementation of `ExecutionCtxTrait`.
        // A clean fix would be to add `Ctx` as a generic parameter to this method in the `Executor` trait, but that would be a breaking change.
        size_of::<PowdrPreCompute<BabyBear, ExecutionCtx>>()
    }

    #[cfg(not(feature = "tco"))]
    fn pre_compute<Ctx>(
        &self,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<ExecuteFunc<BabyBear, Ctx>, StaticProgramError>
    where
        Ctx: ExecutionCtxTrait,
    {
        let pre_compute: &mut PowdrPreCompute<BabyBear, Ctx> = data.borrow_mut();

        self.pre_compute_impl::<Ctx>(pc, inst, pre_compute)?;

        Ok(execute_e1_impl::<BabyBear, Ctx, ISA>)
    }

    #[cfg(feature = "tco")]
    fn handler<Ctx>(
        &self,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<openvm_circuit::arch::Handler<BabyBear, Ctx>, StaticProgramError>
    where
        Ctx: ExecutionCtxTrait,
    {
        let pre_compute: &mut PowdrPreCompute<BabyBear, Ctx> = data.borrow_mut();
        self.pre_compute_impl::<Ctx>(pc, inst, pre_compute)?;
        Ok(execute_e1_handler::<BabyBear, Ctx>)
    }
}

impl<ISA: OpenVmISA> InterpreterMeteredExecutor<BabyBear> for PowdrExecutor<ISA> {
    fn metered_pre_compute_size(&self) -> usize {
        // TODO: do we know `MeteredCtx` is correct? It's only one implementation of `MeteredExecutionCtxTrait`.
        // A clean fix would be to add `Ctx` as a generic parameter to this method in the `MeteredExecutor` trait, but that would be a breaking change.
        size_of::<E2PreCompute<PowdrPreCompute<BabyBear, MeteredCtx>>>()
    }

    #[cfg(not(feature = "tco"))]
    fn metered_pre_compute<Ctx>(
        &self,
        chip_idx: usize,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<ExecuteFunc<BabyBear, Ctx>, StaticProgramError>
    where
        Ctx: MeteredExecutionCtxTrait,
    {
        let pre_compute: &mut E2PreCompute<PowdrPreCompute<BabyBear, Ctx>> = data.borrow_mut();
        pre_compute.chip_idx = chip_idx as u32;

        self.pre_compute_impl::<Ctx>(pc, inst, &mut pre_compute.data)?;

        Ok(execute_e2_impl::<BabyBear, Ctx, ISA>)
    }

    #[cfg(feature = "tco")]
    fn metered_handler<Ctx>(
        &self,
        chip_idx: usize,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<openvm_circuit::arch::Handler<BabyBear, Ctx>, StaticProgramError>
    where
        Ctx: MeteredExecutionCtxTrait,
    {
        let pre_compute: &mut E2PreCompute<PowdrPreCompute<BabyBear, Ctx>> = data.borrow_mut();
        pre_compute.chip_idx = chip_idx as u32;

        self.pre_compute_impl::<Ctx>(pc, inst, &mut pre_compute.data)?;

        Ok(execute_e2_handler::<BabyBear, Ctx>)
    }
}

#[cfg(feature = "aot")]
impl<ISA: OpenVmISA> AotExecutor<BabyBear> for PowdrExecutor<ISA> {
    fn is_aot_supported(&self, _inst: &Instruction<BabyBear>) -> bool {
        false
    }

    fn generate_x86_asm(
        &self,
        _inst: &Instruction<BabyBear>,
        _pc: u32,
    ) -> Result<String, openvm_circuit::arch::AotError> {
        std::unimplemented!()
    }
}

#[cfg(feature = "aot")]
impl<ISA: OpenVmISA> AotMeteredExecutor<BabyBear> for PowdrExecutor<ISA> {
    fn is_aot_metered_supported(&self, _inst: &Instruction<BabyBear>) -> bool {
        false
    }

    fn generate_x86_metered_asm(
        &self,
        _inst: &Instruction<BabyBear>,
        _pc: u32,
        _chip_idx: usize,
        _config: &openvm_circuit::arch::SystemConfig,
    ) -> Result<String, openvm_circuit::arch::AotError> {
        std::unimplemented!()
    }
}

impl<ISA: OpenVmISA> PowdrExecutor<ISA> {
    #[cfg(not(feature = "tco"))]
    /// The implementation of pre_compute, shared between Executor and MeteredExecutor.
    #[inline]
    fn pre_compute_impl<Ctx>(
        &self,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut PowdrPreCompute<BabyBear, Ctx>,
    ) -> Result<(), StaticProgramError>
    where
        Ctx: ExecutionCtxTrait,
    {
        use openvm_stark_backend::{p3_field::Field, p3_maybe_rayon::prelude::ParallelIterator};

        let &Instruction {
            a,
            b,
            c,
            d,
            e,
            f,
            g,
            ..
        } = inst;

        // TODO: debug_assert that the opcode is the one we expect

        if !a.is_zero()
            || !b.is_zero()
            || !c.is_zero()
            || !d.is_zero()
            || !e.is_zero()
            || !f.is_zero()
            || !g.is_zero()
        {
            return Err(StaticProgramError::InvalidInstruction(pc));
        }

        let executor_inventory = &self.executor_inventory;
        // Set the data using the original instructions
        let new_data = PowdrPreCompute {
            height_change: self.height_change,
            original_instructions: self
                .apc
                .block
                .par_instructions()
                .map(|(pc, instruction)| {
                    let executor = executor_inventory
                        .get_executor(instruction.inner.opcode)
                        .ok_or(StaticProgramError::ExecutorNotFound {
                            opcode: instruction.inner.opcode,
                        })?;
                    let pre_compute_size = executor.pre_compute_size();
                    let mut pre_compute_data = vec![0u8; pre_compute_size];
                    let execute_func = executor.pre_compute::<Ctx>(
                        pc as u32,
                        &instruction.inner,
                        &mut pre_compute_data,
                    )?;
                    Ok((execute_func, pre_compute_data.to_vec()))
                })
                .collect::<Result<Vec<_>, StaticProgramError>>()?,
            optimistic_constraints: self.apc.optimistic_constraints.clone(),
        };
        unsafe {
            std::ptr::write(data, new_data);
        }

        Ok(())
    }

    #[cfg(feature = "tco")]
    /// The implementation of pre_compute, shared between Executor and MeteredExecutor.
    #[inline]
    fn pre_compute_impl<Ctx>(
        &self,
        _pc: u32,
        _inst: &Instruction<BabyBear>,
        _data: &mut PowdrPreCompute<BabyBear, Ctx>,
    ) -> Result<(), StaticProgramError> {
        unimplemented!("tco is not implemented yet")
    }
}

/// The implementation of the execute function, shared between Executor and MeteredExecutor.
#[inline(always)]
unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, ISA: OpenVmISA>(
    pre_compute: &PowdrPreCompute<F, CTX>,
    exec_state: &mut VmExecState<F, GuestMemory, CTX>,
) {
    let mut optimistic_constraint_evalutator = OptimisticConstraintEvaluator::new();
    // Check the state before execution
    assert!(optimistic_constraint_evalutator
        .try_next_execution_step::<OpenVmExecutionState<'_, F, ISA>>(
            &OpenVmExecutionState::from(&exec_state.vm_state),
            &pre_compute.optimistic_constraints
        )
        .is_ok());
    for (executor, data) in &pre_compute.original_instructions {
        executor(data.as_ptr(), exec_state);
        // Check the state after each original instruction
        assert!(optimistic_constraint_evalutator
            .try_next_execution_step::<OpenVmExecutionState<'_, F, ISA>>(
                &OpenVmExecutionState::from(&exec_state.vm_state),
                &pre_compute.optimistic_constraints
            )
            .is_ok());
    }
}

#[create_handler]
unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, ISA: OpenVmISA>(
    pre_compute: *const u8,
    exec_state: &mut VmExecState<F, GuestMemory, CTX>,
) {
    let pre_compute: &PowdrPreCompute<F, CTX> =
        std::slice::from_raw_parts(pre_compute, size_of::<PowdrPreCompute<F, CTX>>()).borrow();
    execute_e12_impl::<F, CTX, ISA>(pre_compute, exec_state);
}

#[create_handler]
unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, ISA: OpenVmISA>(
    pre_compute: *const u8,
    exec_state: &mut VmExecState<F, GuestMemory, CTX>,
) {
    let pre_compute: &E2PreCompute<PowdrPreCompute<F, CTX>> = std::slice::from_raw_parts(
        pre_compute,
        size_of::<E2PreCompute<PowdrPreCompute<F, CTX>>>(),
    )
    .borrow();
    exec_state.ctx.on_height_change(
        pre_compute.chip_idx as usize,
        pre_compute.data.height_change,
    );
    execute_e12_impl::<F, CTX, ISA>(&pre_compute.data, exec_state);
}

// Preflight execution is implemented separately for CPU and GPU backends, because they use a different arena from `self`
// TODO: reduce code duplication between the two implementations. The main issue now is we need to use the concrete arena types.
impl<ISA: OpenVmISA> PreflightExecutor<BabyBear, MatrixRecordArena<BabyBear>>
    for PowdrExecutor<ISA>
{
    fn execute(
        &self,
        state: VmStateMut<BabyBear, TracingMemory, MatrixRecordArena<BabyBear>>,
        _: &Instruction<BabyBear>,
    ) -> Result<(), ExecutionError> {
        // Extract the state components, since `execute` consumes the state but we need to pass it to each instruction execution
        let VmStateMut {
            pc,
            memory,
            streams,
            rng,
            custom_pvs,
            ctx,
            #[cfg(feature = "metrics")]
            metrics,
        } = state;

        // We use the arena for metrics, so this line ensures that the number of rows is reported correctly.
        // It does not allocate any extra memory, because the memory is already at initialization.
        #[cfg(feature = "metrics")]
        ctx.alloc_single_row();

        // Initialize the original arenas if not already initialized
        let mut original_arenas = self.original_arenas_cpu.as_ref().borrow_mut();

        // Recover an estimate of how many times the APC is called in this segment based on the current ctx height and width
        let apc_call_count = || ctx.trace_buffer.len() / ctx.width;

        let original_arenas =
            original_arenas.ensure_initialized(apc_call_count, &self.air_by_opcode_id, &self.apc);

        // execute the original instructions one by one
        for (instruction, cached_meta) in self
            .apc
            .instructions()
            .zip_eq(&self.cached_instructions_meta)
        {
            let executor = &self.executor_inventory.executors[cached_meta.executor_index];

            let ctx_arena = if cached_meta.should_use_real_arena {
                original_arenas.real_arena_mut_by_index(cached_meta.arena_index)
            } else {
                original_arenas.dummy_arena_mut_by_index(cached_meta.arena_index)
            };

            let state = VmStateMut {
                pc,
                memory,
                streams,
                rng,
                custom_pvs,
                // We execute in the context of the relevant original table
                ctx: ctx_arena,
                // TODO: should we pass around the same metrics object, or snapshot it at the beginning of this method and apply a single update at the end?
                #[cfg(feature = "metrics")]
                metrics,
            };

            executor.execute(state, &instruction.inner)?;
        }

        // Update the real number of calls to the APC
        original_arenas.number_of_calls += 1;

        Ok(())
    }

    fn get_opcode_name(&self, opcode: usize) -> String {
        format!("APC_{opcode}")
    }
}

// The GPU preflight executor implementation
impl<ISA: OpenVmISA> PreflightExecutor<BabyBear, DenseRecordArena> for PowdrExecutor<ISA> {
    fn execute(
        &self,
        state: VmStateMut<BabyBear, TracingMemory, DenseRecordArena>,
        _: &Instruction<BabyBear>,
    ) -> Result<(), ExecutionError> {
        // Extract the state components, since `execute` consumes the state but we need to pass it to each instruction execution
        let VmStateMut {
            pc,
            memory,
            streams,
            rng,
            custom_pvs,
            ctx,
            #[cfg(feature = "metrics")]
            metrics,
        } = state;

        // Initialize the original arenas if not already initialized
        let mut original_arenas = self.original_arenas_gpu.as_ref().borrow_mut();

        // Recover an (over)estimate of how many times the APC is called in this segment
        // Overestimate is fine because we can initailize dummy arenas with some extra space
        // Exact apc call count from execution is used in final tracegen regardless
        let apc_call_count = || {
            let apc_width = self.apc.machine().main_columns().count();
            let bytes_per_row = apc_width * std::mem::size_of::<u32>();
            let buf = ctx.records_buffer.get_ref();
            buf.len() / bytes_per_row
        };

        let original_arenas =
            original_arenas.ensure_initialized(apc_call_count, &self.air_by_opcode_id, &self.apc);

        // execute the original instructions one by one
        for (instruction, cached_meta) in
            self.apc.instructions().zip(&self.cached_instructions_meta)
        {
            let executor = &self.executor_inventory.executors[cached_meta.executor_index];

            let ctx_arena = if cached_meta.should_use_real_arena {
                original_arenas.real_arena_mut_by_index(cached_meta.arena_index)
            } else {
                original_arenas.dummy_arena_mut_by_index(cached_meta.arena_index)
            };

            let state = VmStateMut {
                pc,
                memory,
                streams,
                rng,
                custom_pvs,
                // We execute in the context of the relevant original table
                ctx: ctx_arena,
                // TODO: should we pass around the same metrics object, or snapshot it at the beginning of this method and apply a single update at the end?
                #[cfg(feature = "metrics")]
                metrics,
            };

            executor.execute(state, &instruction.inner)?;
        }

        // Update the real number of calls to the APC
        original_arenas.number_of_calls += 1;

        Ok(())
    }

    fn get_opcode_name(&self, opcode: usize) -> String {
        format!("APC_{opcode}")
    }
}

impl<ISA: OpenVmISA> PowdrExecutor<ISA> {
    pub fn new(
        air_by_opcode_id: OriginalAirs<BabyBear, ISA>,
        base_config: OriginalVmConfig<ISA>,
        apc: IsaApc<BabyBear, ISA>,
        record_arena_by_air_name_cpu: Rc<RefCell<OriginalArenas<MatrixRecordArena<BabyBear>>>>,
        record_arena_by_air_name_gpu: Rc<RefCell<OriginalArenas<DenseRecordArena>>>,
        height_change: u32,
    ) -> Self {
        let executor_inventory = base_config.config.create_executors().unwrap();

        let arena_index_by_name =
            record_arena_dimension_by_air_name_per_apc_call(apc.as_ref(), &air_by_opcode_id)
                .iter()
                .enumerate()
                .map(|(idx, (name, _))| (name.clone(), idx))
                .collect::<HashMap<_, _>>();

        let cached_instructions_meta = apc
            .instructions()
            .zip_eq(apc.subs.iter())
            .map(|(instruction, sub)| {
                let executor_index = *executor_inventory
                    .instruction_lookup
                    .get(&instruction.inner.opcode)
                    .expect("missing executor for opcode")
                    as usize;
                let (air_name, _) = air_by_opcode_id.get_instruction_air_and_id(instruction);
                let arena_index = *arena_index_by_name
                    .get(&air_name)
                    .expect("missing arena for air");
                CachedInstructionMeta {
                    executor_index,
                    arena_index,
                    should_use_real_arena: !sub.is_empty(),
                }
            })
            .collect();

        Self {
            air_by_opcode_id,
            executor_inventory,
            apc,
            original_arenas_cpu: record_arena_by_air_name_cpu,
            original_arenas_gpu: record_arena_by_air_name_gpu,
            height_change,
            cached_instructions_meta,
        }
    }
}


================================================
FILE: openvm/src/powdr_extension/mod.rs
================================================
/// The core logic of our extension
pub mod chip;
/// The executor for the powdr instructions
pub mod executor;
/// The trace generator for the powdr instructions
pub mod trace_generator;

/// The opcodes for the powdr instructions, which is used in the chip implementation and contains the opcode ID
pub mod opcode;
/// The integration of our extension with the VM
mod vm;

pub use opcode::PowdrOpcode;
pub use vm::{PowdrExtension, PowdrExtensionExecutor, PowdrPrecompile};


================================================
FILE: openvm/src/powdr_extension/opcode.rs
================================================
use openvm_instructions::LocalOpcode;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
pub struct PowdrOpcode {
    pub class_offset: usize,
}

impl LocalOpcode for PowdrOpcode {
    // This offset must not be accessed, since we want many opcodes of the same type to have different class_offsets.
    // This is because each opcode has its own air.
    const CLASS_OFFSET: usize = unreachable!();

    fn from_usize(value: usize) -> Self {
        Self {
            class_offset: value,
        }
    }

    // The local offset is always 0, since we want to have many opcodes over the same air.
    fn local_usize(&self) -> usize {
        0
    }

    // The global opcode is based on `class_offset`, *NOT* on the static `CLASS_OFFSET`.
    fn global_opcode(&self) -> openvm_instructions::VmOpcode {
        openvm_instructions::VmOpcode::from_usize(self.class_offset)
    }
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/common.rs
================================================
use derive_more::From;
use openvm_circuit::system::phantom::PhantomExecutor;
use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
use openvm_circuit_primitives::Chip;
use openvm_stark_backend::p3_field::PrimeField32;

use crate::isa::OpenVmISA;

#[allow(clippy::large_enum_variant)]
#[derive(Chip, PreflightExecutor, Executor, MeteredExecutor, AnyEnum)]
pub enum DummyExecutor<F: PrimeField32, ISA: OpenVmISA> {
    #[any_enum]
    Base(ISA::Executor<F>),
    #[any_enum]
    Shared(SharedExecutor<F>),
}

#[derive(Chip, PreflightExecutor, Executor, MeteredExecutor, From, AnyEnum)]
pub enum SharedExecutor<F: PrimeField32> {
    Phantom(PhantomExecutor<F>),
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/cpu/inventory.rs
================================================
use openvm_circuit::{
    arch::{MatrixRecordArena, VmChipComplex},
    system::SystemChipInventory,
};
use openvm_stark_backend::{config::Val, prover::cpu::CpuBackend};

/// A dummy inventory used for execution of autoprecompiles
/// It extends the `SdkVmConfigExecutor` and `SdkVmConfigPeriphery`, providing them with shared, pre-loaded periphery chips to avoid memory allocations by each SDK chip
pub type DummyChipComplex<SC> =
    VmChipComplex<SC, MatrixRecordArena<Val<SC>>, CpuBackend<SC>, SystemChipInventory<SC>>;


================================================
FILE: openvm/src/powdr_extension/trace_generator/cpu/mod.rs
================================================
use std::{collections::HashMap, sync::Arc};

use itertools::Itertools;
use openvm_circuit::{arch::MatrixRecordArena, utils::next_power_of_two_or_zero};
use openvm_stark_backend::{
    p3_field::{Field, FieldAlgebra, PrimeField32},
    p3_matrix::dense::{DenseMatrix, RowMajorMatrix},
    prover::{hal::ProverBackend, types::AirProvingContext},
    Chip,
};
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::trace_handler::TraceTrait;
use powdr_constraint_solver::constraint_system::ComputationMethod;

use crate::{
    extraction_utils::{OriginalAirs, OriginalVmConfig},
    isa::IsaApc,
    isa::OpenVmISA,
    powdr_extension::{chip::PowdrChipCpu, executor::OriginalArenas},
};

/// The inventory of the PowdrExecutor, which contains the executors for each opcode.
mod inventory;
/// The shared periphery chips used by the PowdrTraceGenerator
mod periphery;

pub use inventory::DummyChipComplex;
pub use periphery::{
    PowdrPeripheryInstancesCpu, SharedPeripheryChipsCpu, SharedPeripheryChipsCpuProverExt,
};

/// A wrapper around a DenseMatrix to implement `TraceTrait` which is required for `generate_trace`.
pub struct SharedCpuTrace<F> {
    pub matrix: Arc<RowMajorMatrix<F>>,
}

impl<F: Send + Sync> TraceTrait<F> for SharedCpuTrace<F> {
    type Values = Vec<F>;

    fn width(&self) -> usize {
        self.matrix.width
    }

    fn values(&self) -> &Self::Values {
        &self.matrix.values
    }
}

impl<F> From<Arc<RowMajorMatrix<F>>> for SharedCpuTrace<F> {
    fn from(matrix: Arc<RowMajorMatrix<F>>) -> Self {
        Self { matrix }
    }
}

impl<R, PB: ProverBackend<Matrix = Arc<RowMajorMatrix<BabyBear>>>, ISA: OpenVmISA> Chip<R, PB>
    for PowdrChipCpu<ISA>
{
    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<PB> {
        tracing::trace!("Generating air proof input for PowdrChip {}", self.name);

        let trace = self
            .trace_generator
            .generate_witness(self.record_arena_by_air_name.take());

        AirProvingContext::simple(Arc::new(trace), vec![])
    }
}

pub struct PowdrTraceGeneratorCpu<ISA: OpenVmISA> {
    pub apc: IsaApc<BabyBear, ISA>,
    pub original_airs: OriginalAirs<BabyBear, ISA>,
    pub config: OriginalVmConfig<ISA>,
    pub periphery: PowdrPeripheryInstancesCpu<ISA>,
}

impl<ISA: OpenVmISA> PowdrTraceGeneratorCpu<ISA> {
    pub fn new(
        apc: IsaApc<BabyBear, ISA>,
        original_airs: OriginalAirs<BabyBear, ISA>,
        config: OriginalVmConfig<ISA>,
        periphery: PowdrPeripheryInstancesCpu<ISA>,
    ) -> Self {
        Self {
            apc,
            original_airs,
            config,
            periphery,
        }
    }

    pub fn generate_witness(
        &self,
        original_arenas: OriginalArenas<MatrixRecordArena<BabyBear>>,
    ) -> DenseMatrix<BabyBear> {
        use powdr_autoprecompiles::trace_handler::{generate_trace, TraceData};

        let width = self.apc.machine().main_columns().count();

        let mut original_arenas = match original_arenas {
            OriginalArenas::Initialized(arenas) => arenas,
            OriginalArenas::Uninitialized => {
                // if the arenas are uninitialized, the apc was not called, so we return an empty trace
                return RowMajorMatrix::new(vec![], width);
            }
        };

        let num_apc_calls = original_arenas.number_of_calls;

        let chip_inventory = {
            let airs = ISA::create_dummy_airs(self.config.config(), self.periphery.dummy.clone())
                .expect("Failed to create dummy airs");

            ISA::create_dummy_chip_complex_cpu(
                self.config.config(),
                airs,
                self.periphery.dummy.clone(),
            )
            .expect("Failed to create chip complex")
            .inventory
        };

        let dummy_trace_by_air_name: HashMap<String, SharedCpuTrace<BabyBear>> = chip_inventory
            .chips()
            .iter()
            .enumerate()
            .rev()
            .filter_map(|(insertion_idx, chip)| {
                let air_name = chip_inventory.airs().ext_airs()[insertion_idx].name();

                let record_arena = {
                    match original_arenas.take_real_arena(&air_name) {
                        Some(ra) => ra,
                        None => return None, // skip this iteration, because we only have record arena for chips that are used
                    }
                };

                let shared_trace = chip.generate_proving_ctx(record_arena).common_main.unwrap();

                Some((air_name, SharedCpuTrace::from(shared_trace)))
            })
            .collect();

        let TraceData {
            dummy_values,
            dummy_trace_index_to_apc_index_by_instruction,
            apc_poly_id_to_index,
            columns_to_compute,
        } = generate_trace(
            &dummy_trace_by_air_name,
            &self.original_airs,
            num_apc_calls,
            &self.apc,
        );

        // allocate for apc trace
        let width = apc_poly_id_to_index.len();
        let height = next_power_of_two_or_zero(num_apc_calls);
        let mut values = <BabyBear as FieldAlgebra>::zero_vec(height * width);

        // go through the final table and fill in the values
        values
            // a record is `width` values
            // TODO: optimize by parallelizing on chunks of rows, currently fails because `dyn AnyChip<MatrixRecordArena<Val<SC>>>` is not `Send`
            .chunks_mut(width)
            .zip(dummy_values)
            .for_each(|(row_slice, dummy_values)| {
                // map the dummy rows to the autoprecompile row

                use powdr_autoprecompiles::expression::MappingRowEvaluator;
                for (dummy_row, dummy_trace_index_to_apc_index) in dummy_values
                    .iter()
                    .map(|r| &r.data[r.start..r.start + r.length])
                    .zip_eq(&dummy_trace_index_to_apc_index_by_instruction)
                {
                    for (dummy_trace_index, apc_index) in dummy_trace_index_to_apc_index {
                        row_slice[*apc_index] = dummy_row[*dummy_trace_index];
                    }
                }

                // Fill in the columns we have to compute from other columns
                // (these are either new columns or for example the "is_valid" column).
                for derived_column in columns_to_compute {
                    let col_index = apc_poly_id_to_index[&derived_column.variable.id];
                    row_slice[col_index] = match &derived_column.computation_method {
                        ComputationMethod::Constant(c) => *c,
                        ComputationMethod::QuotientOrZero(e1, e2) => {
                            use powdr_number::ExpressionConvertible;

                            let divisor_val = e2.to_expression(&|n| *n, &|column_ref| {
                                row_slice[apc_poly_id_to_index[&column_ref.id]]
                            });
                            if divisor_val.is_zero() {
                                BabyBear::ZERO
                            } else {
                                divisor_val.inverse()
                                    * e1.to_expression(&|n| *n, &|column_ref| {
                                        row_slice[apc_poly_id_to_index[&column_ref.id]]
                                    })
                            }
                        }
                    };
                }

                let evaluator = MappingRowEvaluator::new(row_slice, &apc_poly_id_to_index);

                // replay the side effects of this row on the main periphery
                self.apc
                    .machine()
                    .bus_interactions
                    .iter()
                    .for_each(|interaction| {
                        use powdr_autoprecompiles::expression::{
                            AlgebraicEvaluator, ConcreteBusInteraction,
                        };

                        let ConcreteBusInteraction { id, mult, args } =
                            evaluator.eval_bus_interaction(interaction);
                        self.periphery.real.apply(
                            id as u16,
                            mult.as_canonical_u32(),
                            args.map(|arg| arg.as_canonical_u32()),
                            &self.periphery.bus_ids,
                        );
                    });
            });

        RowMajorMatrix::new(values, width)
    }
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/cpu/periphery.rs
================================================
use std::marker::PhantomData;

use crate::powdr_extension::trace_generator::common::DummyExecutor;
use openvm_circuit::arch::{
    AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutorInventoryBuilder,
    ExecutorInventoryError, VmCircuitExtension, VmExecutionExtension, VmProverExtension,
};
use openvm_circuit_primitives::{
    bitwise_op_lookup::{
        BitwiseOperationLookupAir, BitwiseOperationLookupChip, SharedBitwiseOperationLookupChip,
    },
    range_tuple::{RangeTupleCheckerAir, RangeTupleCheckerChip, SharedRangeTupleCheckerChip},
    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerAir},
};
use openvm_stark_backend::{config::StarkGenericConfig, p3_field::PrimeField32};

use itertools::Itertools;
use openvm_circuit::arch::RowMajorMatrixArena;
use openvm_stark_backend::config::Val;
use openvm_stark_backend::engine::StarkEngine;
use openvm_stark_backend::prover::cpu::{CpuBackend, CpuDevice};

use crate::{isa::OpenVmISA, PeripheryBusIds};

/// The shared chips which can be used by the PowdrChip.
#[derive(Clone)]
pub struct PowdrPeripheryInstancesCpu<ISA> {
    /// The real chips used for the main execution.
    pub real: SharedPeripheryChipsCpu<ISA>,
    /// The dummy chips used for all APCs. They share the range checker but create new instances of the bitwise lookup chip and the tuple range checker.
    pub dummy: SharedPeripheryChipsCpu<ISA>,
    /// The bus ids of the periphery
    pub bus_ids: PeripheryBusIds,
}

#[derive(Clone)]
pub struct SharedPeripheryChipsCpu<ISA> {
    pub bitwise_lookup_8: Option<SharedBitwiseOperationLookupChip<8>>,
    pub range_checker: SharedVariableRangeCheckerChip,
    pub tuple_range_checker: Option<SharedRangeTupleCheckerChip<2>>,
    _marker: PhantomData<ISA>,
}

impl<ISA> PowdrPeripheryInstancesCpu<ISA> {
    pub fn new(
        range_checker: SharedVariableRangeCheckerChip,
        bitwise_8: Option<SharedBitwiseOperationLookupChip<8>>,
        tuple_range_checker: Option<SharedRangeTupleCheckerChip<2>>,
        bus_ids: PeripheryBusIds,
    ) -> Self {
        Self {
            real: SharedPeripheryChipsCpu {
                bitwise_lookup_8: bitwise_8.clone(),
                range_checker: range_checker.clone(),
                tuple_range_checker: tuple_range_checker.clone(),
                _marker: PhantomData,
            },
            // Bitwise lookup and tuple range checker do not need to be shared with the main execution:
            // If we did share, we'd have to roll back the side effects of execution and apply the side effects from the apc air onto the main periphery.
            // By not sharing them, we can throw away the dummy ones after execution and only apply the side effects from the apc air onto the main periphery.
            dummy: SharedPeripheryChipsCpu {
                bitwise_lookup_8: bitwise_8.map(|bitwise_8| {
                    SharedBitwiseOperationLookupChip::new(BitwiseOperationLookupChip::new(
                        bitwise_8.bus(),
                    ))
                }),
                range_checker: range_checker.clone(),
                tuple_range_checker: tuple_range_checker.map(|tuple_range_checker| {
                    SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::new(
                        *tuple_range_checker.bus(),
                    ))
                }),
                _marker: PhantomData,
            },
            bus_ids,
        }
    }
}

impl<F: PrimeField32, ISA: OpenVmISA> VmExecutionExtension<F> for SharedPeripheryChipsCpu<ISA> {
    type Executor = DummyExecutor<F, ISA>;

    fn extend_execution(
        &self,
        _: &mut ExecutorInventoryBuilder<F, Self::Executor>,
    ) -> Result<(), ExecutorInventoryError> {
        // No executor to add for periphery chips
        Ok(())
    }
}

impl<SC: StarkGenericConfig, ISA: OpenVmISA> VmCircuitExtension<SC>
    for SharedPeripheryChipsCpu<ISA>
{
    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
        // create dummy airs
        if let Some(bitwise_lookup_8) = &self.bitwise_lookup_8 {
            assert!(inventory
                .find_air::<BitwiseOperationLookupAir<8>>()
                .next()
                .is_none());
            inventory.add_air(BitwiseOperationLookupAir::<8>::new(
                bitwise_lookup_8.air.bus,
            ));
        }

        if let Some(tuple_range_checker) = &self.tuple_range_checker {
            assert!(inventory
                .find_air::<RangeTupleCheckerAir<2>>()
                .next()
                .is_none());
            inventory.add_air(RangeTupleCheckerAir::<2> {
                bus: tuple_range_checker.air.bus,
            });
        }

        // The range checker is already present in the builder because it's is used by the system, so we don't add it again.
        assert!(inventory
            .find_air::<VariableRangeCheckerAir>()
            .next()
            .is_some());

        Ok(())
    }
}

pub struct SharedPeripheryChipsCpuProverExt;

// We implement an extension to make it easy to pre-load the shared chips into the VM inventory.
// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
// BitwiseOperationLookupChip) are specific to CpuBackend.
impl<E, SC, RA, ISA: OpenVmISA> VmProverExtension<E, RA, SharedPeripheryChipsCpu<ISA>>
    for SharedPeripheryChipsCpuProverExt
where
    SC: StarkGenericConfig,
    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
    RA: RowMajorMatrixArena<Val<SC>>,
    Val<SC>: PrimeField32,
{
    fn extend_prover(
        &self,
        extension: &SharedPeripheryChipsCpu<ISA>,
        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
    ) -> Result<(), ChipInventoryError> {
        // Sanity check that the shared chips are not already present in the builder.
        if let Some(bitwise_lookup_8) = &extension.bitwise_lookup_8 {
            assert!(inventory
                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
                .next()
                .is_none());
            inventory.add_periphery_chip(bitwise_lookup_8.clone());
        }

        if let Some(tuple_checker) = &extension.tuple_range_checker {
            assert!(inventory
                .find_chip::<SharedRangeTupleCheckerChip<2>>()
                .next()
                .is_none());
            inventory.add_periphery_chip(tuple_checker.clone());
        }

        // The range checker is already present in the builder because it's is used by the system, so we don't add it again.
        assert!(inventory
            .find_chip::<SharedVariableRangeCheckerChip>()
            .next()
            .is_some());

        Ok(())
    }
}

impl<ISA> SharedPeripheryChipsCpu<ISA> {
    /// Sends concrete values to the shared chips using a given bus id.
    /// Panics if the bus id doesn't match any of the chips' bus ids.
    pub fn apply(
        &self,
        bus_id: u16,
        mult: u32,
        mut args: impl Iterator<Item = u32>,
        periphery_bus_ids: &PeripheryBusIds,
    ) {
        match bus_id {
            id if Some(id) == periphery_bus_ids.bitwise_lookup => {
                // bitwise operation lookup
                // interpret the arguments, see `Air<AB> for BitwiseOperationLookupAir<NUM_BITS>`
                let [x, y, x_xor_y, selector] = [
                    args.next().unwrap(),
                    args.next().unwrap(),
                    args.next().unwrap(),
                    args.next().unwrap(),
                ];

                for _ in 0..mult {
                    match selector {
                        0 => {
                            self.bitwise_lookup_8.as_ref().unwrap().request_range(x, y);
                        }
                        1 => {
                            let res = self.bitwise_lookup_8.as_ref().unwrap().request_xor(x, y);
                            debug_assert_eq!(res, x_xor_y);
                        }
                        _ => {
                            unreachable!("Invalid selector");
                        }
                    }
                }
            }
            id if id == periphery_bus_ids.range_checker => {
                // interpret the arguments, see `Air<AB> for VariableRangeCheckerAir`
                let [value, max_bits] = [args.next().unwrap(), args.next().unwrap()];

                for _ in 0..mult {
                    self.range_checker.add_count(value, max_bits as usize);
                }
            }
            id if Some(id) == periphery_bus_ids.tuple_range_checker => {
                // tuple range checker
                // We pass a slice. It is checked inside `add_count`.
                let args = args.collect_vec();
                for _ in 0..mult {
                    self.tuple_range_checker.as_ref().unwrap().add_count(&args);
                }
            }
            0..=2 => {
                // execution bridge, memory, pc lookup
                // do nothing
            }
            _ => {
                unreachable!("Bus interaction {} not implemented", bus_id);
            }
        }
    }
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/cuda/inventory.rs
================================================
use openvm_circuit::{
    arch::{DenseRecordArena, VmChipComplex},
    system::cuda::SystemChipInventoryGPU,
};
use openvm_cuda_backend::prover_backend::GpuBackend;

pub type GpuDummyChipComplex<SC> =
    VmChipComplex<SC, DenseRecordArena, GpuBackend, SystemChipInventoryGPU>;


================================================
FILE: openvm/src/powdr_extension/trace_generator/cuda/mod.rs
================================================
use std::collections::{BTreeMap, HashMap};

use itertools::Itertools;
use openvm_circuit::{
    arch::{ChipInventory, DenseRecordArena},
    utils::next_power_of_two_or_zero,
};
use openvm_cuda_backend::base::DeviceMatrix;
use openvm_cuda_common::copy::MemCopyH2D;
use openvm_stark_backend::{
    p3_field::PrimeField32,
    prover::{hal::ProverBackend, types::AirProvingContext},
    Chip,
};
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::{
    expression::{AlgebraicExpression, AlgebraicReference},
    symbolic_machine::SymbolicBusInteraction,
};
use powdr_constraint_solver::constraint_system::{ComputationMethod, DerivedVariable};
use powdr_expression::{AlgebraicBinaryOperator, AlgebraicUnaryOperator};

use crate::{
    cuda_abi::{self, DerivedExprSpec, DevInteraction, ExprSpan, OpCode, OriginalAir, Subst},
    extraction_utils::{OriginalAirs, OriginalVmConfig},
    isa::{IsaApc, OpenVmISA},
    powdr_extension::{chip::PowdrChipGpu, executor::OriginalArenas},
    BabyBearSC, GpuBackend,
};

mod inventory;
mod periphery;

pub use inventory::GpuDummyChipComplex;
pub use periphery::{
    PowdrPeripheryInstancesGpu, SharedPeripheryChipsGpu, SharedPeripheryChipsGpuProverExt,
};

/// Encodes an algebraic expression into GPU stack-machine bytecode.
///
/// Appends instructions to `bc` representing `expr` using the opcodes in `OpCode`.
/// References are encoded as `PushApc` with a column-major offset computed from
/// `id_to_apc_index` and `apc_height` (offset = apc_col_index * apc_height).
/// Constants are encoded as `PushConst` followed by the field element as `u32`.
/// Unary minus and binary operations map to `Neg`, `Add`, `Sub`, and `Mul`.
///
/// Note: This function does not track or enforce the evaluation stack depth,
/// which is done in device code.
fn emit_expr(
    bc: &mut Vec<u32>,
    expr: &AlgebraicExpression<BabyBear>,
    id_to_apc_index: &BTreeMap<u64, usize>,
    apc_height: usize,
) {
    match expr {
        AlgebraicExpression::Number(c) => {
            bc.push(OpCode::PushConst as u32);
            bc.push(c.as_canonical_u32());
        }
        AlgebraicExpression::Reference(r) => {
            let idx = (id_to_apc_index[&r.id] * apc_height) as u32;
            bc.push(OpCode::PushApc as u32);
            bc.push(idx);
        }
        AlgebraicExpression::UnaryOperation(u) => {
            emit_expr(bc, &u.expr, id_to_apc_index, apc_height);
            match u.op {
                AlgebraicUnaryOperator::Minus => bc.push(OpCode::Neg as u32),
            }
        }
        AlgebraicExpression::BinaryOperation(b) => {
            emit_expr(bc, &b.left, id_to_apc_index, apc_height);
            emit_expr(bc, &b.right, id_to_apc_index, apc_height);
            match b.op {
                AlgebraicBinaryOperator::Add => bc.push(OpCode::Add as u32),
                AlgebraicBinaryOperator::Sub => bc.push(OpCode::Sub as u32),
                AlgebraicBinaryOperator::Mul => bc.push(OpCode::Mul as u32),
            }
        }
    }
}

/// Given the current bytecode, appends bytecode for the expression `expr` and returns the associated span
fn emit_expr_span(
    bc: &mut Vec<u32>,
    expr: &AlgebraicExpression<BabyBear>,
    id_to_apc_index: &BTreeMap<u64, usize>,
    apc_height: usize,
) -> ExprSpan {
    // The span starts where the bytecode currently ends
    let off = bc.len() as u32;
    // Append the bytecode for `expr`
    emit_expr(bc, expr, id_to_apc_index, apc_height);
    // Calculate the length of the span
    let len = (bc.len() as u32) - off;
    ExprSpan { off, len }
}

/// Compile derived columns to GPU bytecode according to input order.
fn compile_derived_to_gpu(
    derived_columns: &[DerivedVariable<
        BabyBear,
        AlgebraicReference,
        AlgebraicExpression<BabyBear>,
    >],
    apc_poly_id_to_index: &BTreeMap<u64, usize>,
    apc_height: usize,
) -> (Vec<DerivedExprSpec>, Vec<u32>) {
    let mut specs = Vec::with_capacity(derived_columns.len());
    let mut bytecode = Vec::new();

    for DerivedVariable {
        variable,
        computation_method,
    } in derived_columns
    {
        let apc_col_index = apc_poly_id_to_index[&variable.id];
        let off = bytecode.len() as u32;
        match computation_method {
            ComputationMethod::Constant(c) => {
                // Encode constant as an expression
                bytecode.push(OpCode::PushConst as u32);
                bytecode.push(c.as_canonical_u32());
            }
            ComputationMethod::QuotientOrZero(e1, e2) => {
                // Invert denominator (or use zero), then multiply with numerator.
                emit_expr(&mut bytecode, e2, apc_poly_id_to_index, apc_height);
                bytecode.push(OpCode::InvOrZero as u32);
                emit_expr(&mut bytecode, e1, apc_poly_id_to_index, apc_height);
                bytecode.push(OpCode::Mul as u32);
            }
        }
        let len = (bytecode.len() as u32) - off;
        specs.push(DerivedExprSpec {
            col_base: (apc_col_index * apc_height) as u64,
            span: ExprSpan { off, len },
        });
    }

    (specs, bytecode)
}

pub fn compile_bus_to_gpu(
    bus_interactions: &[SymbolicBusInteraction<BabyBear>],
    apc_poly_id_to_index: &BTreeMap<u64, usize>,
    apc_height: usize,
) -> (Vec<DevInteraction>, Vec<ExprSpan>, Vec<u32>) {
    let mut interactions = Vec::with_capacity(bus_interactions.len());
    let mut arg_spans = Vec::new();
    let mut bytecode = Vec::new();

    for bus_interaction in bus_interactions {
        // multiplicity as first arg span
        let args_index_off = arg_spans.len() as u32;
        let mult_span = emit_expr_span(
            &mut bytecode,
            &bus_interaction.mult,
            apc_poly_id_to_index,
            apc_height,
        );
        arg_spans.push(mult_span);

        // args
        for arg in &bus_interaction.args {
            let span = emit_expr_span(&mut bytecode, arg, apc_poly_id_to_index, apc_height);
            arg_spans.push(span);
        }

        interactions.push(DevInteraction {
            bus_id: (bus_interaction.id as u32),
            num_args: bus_interaction.args.len() as u32,
            args_index_off,
        });
    }

    (interactions, arg_spans, bytecode)
}

pub struct PowdrTraceGeneratorGpu<ISA: OpenVmISA> {
    pub apc: IsaApc<BabyBear, ISA>,
    pub original_airs: OriginalAirs<BabyBear, ISA>,
    pub config: OriginalVmConfig<ISA>,
    pub periphery: PowdrPeripheryInstancesGpu<ISA>,
}

impl<ISA: OpenVmISA> PowdrTraceGeneratorGpu<ISA> {
    pub fn new(
        apc: IsaApc<BabyBear, ISA>,
        original_airs: OriginalAirs<BabyBear, ISA>,
        config: OriginalVmConfig<ISA>,
        periphery: PowdrPeripheryInstancesGpu<ISA>,
    ) -> Self {
        Self {
            apc,
            original_airs,
            config,
            periphery,
        }
    }

    fn try_generate_witness(
        &self,
        original_arenas: OriginalArenas<DenseRecordArena>,
    ) -> Option<DeviceMatrix<BabyBear>> {
        let mut original_arenas = match original_arenas {
            OriginalArenas::Initialized(arenas) => arenas,
            OriginalArenas::Uninitialized => {
                // if the arenas are uninitialized, the apc was not called, so we return early
                return None;
            }
        };

        let num_apc_calls = original_arenas.number_of_calls;

        let chip_inventory: ChipInventory<BabyBearSC, DenseRecordArena, GpuBackend> = {
            let airs = ISA::create_dummy_airs(self.config.config(), self.periphery.dummy.clone())
                .expect("Failed to create dummy airs");

            ISA::create_dummy_chip_complex_gpu(
                self.config.config(),
                airs,
                self.periphery.dummy.clone(),
            )
            .expect("Failed to create chip complex")
            .inventory
        };

        let dummy_trace_by_air_name: HashMap<String, DeviceMatrix<BabyBear>> = chip_inventory
            .chips()
            .iter()
            .enumerate()
            .rev()
            .filter_map(|(insertion_idx, chip)| {
                let air_name = chip_inventory.airs().ext_airs()[insertion_idx].name();

                let record_arena = {
                    match original_arenas.take_real_arena(&air_name) {
                        Some(ra) => ra,
                        None => return None, // skip this iteration, because we only have record arena for chips that are used
                    }
                };

                // We might have initialized an arena for an AIR which ends up having no real records. It gets filtered out here.
                chip.generate_proving_ctx(record_arena)
                    .common_main
                    .map(|m| (air_name, m))
            })
            .collect();

        // Map from apc poly id to its index in the final apc trace
        let apc_poly_id_to_index: BTreeMap<u64, usize> = self
            .apc
            .machine
            .main_columns()
            .enumerate()
            .map(|(index, c)| (c.id, index))
            .collect();

        // allocate for apc trace
        let width = apc_poly_id_to_index.len();
        let height = next_power_of_two_or_zero(num_apc_calls);
        let mut output = DeviceMatrix::<BabyBear>::with_capacity(height, width);

        // Prepare `OriginalAir` and `Subst` arrays
        let (airs, substitutions) = {
            self.apc
                // go through original instructions
                .instructions()
                // along with their substitutions
                .zip_eq(self.apc.subs())
                // map to `(air_name, substitutions)`
                .filter_map(|(instr, subs)| {
                    if subs.is_empty() {
                        None
                    } else {
                        Some((&self.original_airs.opcode_to_air[&instr.inner.opcode], subs))
                    }
                })
                // group by air name. This results in `HashMap<air_name, Vec<subs>>` where the length of the vector is the number of rows which are created in this air, per apc call
                .into_group_map()
                // go through each air and its substitutions
                .iter()
                .enumerate()
                .fold(
                    (Vec::new(), Vec::new()),
                    |(mut airs, mut substitutions), (air_index, (air_name, subs_by_row))| {
                        // Find the substitutions that map to an apc column
                        let new_substitutions: Vec<Subst> = subs_by_row
                            .iter()
                            // enumerate over them to get the row index inside the air block
                            .enumerate()
                            .flat_map(|(row, subs)| {
                                // for each substitution, map to `Subst` struct
                                subs.iter()
                                    .map(move |sub| (row, sub))
                                    .map(|(row, sub)| Subst {
                                        air_index: air_index as i32,
                                        col: sub.original_poly_index as i32,
                                        row: row as i32,
                                        apc_col: apc_poly_id_to_index[&sub.apc_poly_id] as i32,
                                    })
                            })
                            .collect();

                        // get the device dummy trace for this air
                        let dummy_trace = &dummy_trace_by_air_name[*air_name];

                        use openvm_stark_backend::prover::hal::MatrixDimensions;
                        airs.push(OriginalAir {
                            width: dummy_trace.width() as i32,
                            height: dummy_trace.height() as i32,
                            buffer: dummy_trace.buffer().as_ptr(),
                            row_block_size: subs_by_row.len() as i32,
                        });

                        substitutions.extend(new_substitutions);

                        (airs, substitutions)
                    },
                )
        };

        // Send the airs and substitutions to device
        let airs = airs.to_device().unwrap();
        let substitutions = substitutions.to_device().unwrap();

        cuda_abi::apc_tracegen(&mut output, airs, substitutions, num_apc_calls).unwrap();

        // Apply derived columns using the GPU expression evaluator
        let (derived_specs, derived_bc) = compile_derived_to_gpu(
            &self.apc.machine.derived_columns,
            &apc_poly_id_to_index,
            height,
        );
        // In practice `d_specs` is never empty, because we will always have `is_valid`
        let d_specs = derived_specs.to_device().unwrap();
        let d_bc = derived_bc.to_device().unwrap();
        cuda_abi::apc_apply_derived_expr(&mut output, d_specs, d_bc, num_apc_calls).unwrap();

        // Encode bus interactions for GPU consumption
        let (bus_interactions, arg_spans, bytecode) = compile_bus_to_gpu(
            &self.apc.machine.bus_interactions,
            &apc_poly_id_to_index,
            height,
        );
        let bus_interactions = bus_interactions.to_device().unwrap();
        let arg_spans = arg_spans.to_device().unwrap();
        let bytecode = bytecode.to_device().unwrap();

        // Gather GPU inputs for periphery (bus ids, count device buffers)
        let periphery = &self.periphery.real;

        // Range checker
        let var_range_bus_id = self.periphery.bus_ids.range_checker as u32;
        let var_range_count = &periphery.range_checker.count;

        // Tuple checker
        let tuple_range_checker_chip = periphery.tuple_range_checker.as_ref().unwrap();
        let tuple2_bus_id = self.periphery.bus_ids.tuple_range_checker.unwrap() as u32;
        let tuple2_sizes = tuple_range_checker_chip.sizes;
        let tuple2_count_u32 = tuple_range_checker_chip.count.as_ref();

        // Bitwise lookup; NUM_BITS is fixed at 8 in CUDA
        let bitwise_bus_id = self.periphery.bus_ids.bitwise_lookup.unwrap() as u32;
        let bitwise_count_u32 = periphery.bitwise_lookup_8.as_ref().unwrap().count.as_ref();

        // Launch GPU apply-bus to update periphery histograms on device
        // Note that this is implicitly serialized after `apc_tracegen`,
        // because we use the default host to device stream, which only launches
        // the next kernel function after the prior (`apc_tracegen`) returns.
        // This is important because bus evaluation depends on trace results.
        cuda_abi::apc_apply_bus(
            // APC related
            &output,
            num_apc_calls,
            // Interaction related
            bytecode,
            bus_interactions,
            arg_spans,
            // Variable range checker related
            var_range_bus_id,
            var_range_count,
            // Tuple range checker related
            tuple2_bus_id,
            tuple2_count_u32,
            tuple2_sizes,
            // Bitwise related
            bitwise_bus_id,
            bitwise_count_u32,
        )
        .unwrap();

        Some(output)
    }
}

impl<R, PB: ProverBackend<Matrix = DeviceMatrix<BabyBear>>, ISA: OpenVmISA> Chip<R, PB>
    for PowdrChipGpu<ISA>
{
    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<PB> {
        tracing::trace!("Generating air proof input for PowdrChip {}", self.name);

        let trace = self
            .trace_generator
            .try_generate_witness(self.record_arena_by_air_name.take());

        AirProvingContext::new(vec![], trace, vec![])
    }
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/cuda/periphery.rs
================================================
use openvm_circuit::arch::{
    AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, DenseRecordArena,
    ExecutorInventoryBuilder, ExecutorInventoryError, VmCircuitExtension, VmExecutionExtension,
    VmProverExtension,
};
use openvm_circuit_primitives::{
    bitwise_op_lookup::{
        BitwiseOperationLookupAir, BitwiseOperationLookupChip, BitwiseOperationLookupChipGPU,
    },
    range_tuple::{RangeTupleCheckerAir, RangeTupleCheckerChip, RangeTupleCheckerChipGPU},
    var_range::{VariableRangeCheckerAir, VariableRangeCheckerChipGPU},
};
use openvm_cuda_backend::engine::GpuBabyBearPoseidon2Engine;
use openvm_cuda_backend::prover_backend::GpuBackend;
use openvm_stark_backend::{config::StarkGenericConfig, p3_field::PrimeField32};

use crate::{
    isa::OpenVmISA, powdr_extension::trace_generator::common::DummyExecutor, BabyBearSC,
    PeripheryBusIds,
};
use std::{marker::PhantomData, sync::Arc};

/// The shared chips which can be used by the PowdrChipGpu.
#[derive(Clone)]
pub struct PowdrPeripheryInstancesGpu<ISA> {
    /// The real chips used for the main execution.
    pub real: SharedPeripheryChipsGpu<ISA>,
    /// The dummy chips used for all APCs. They share the range checker but create new instances of the bitwise lookup chip and the tuple range checker.
    pub dummy: SharedPeripheryChipsGpu<ISA>,
    /// The bus ids of the periphery
    pub bus_ids: PeripheryBusIds,
}

#[derive(Clone)]
pub struct SharedPeripheryChipsGpu<ISA> {
    pub bitwise_lookup_8: Option<std::sync::Arc<BitwiseOperationLookupChipGPU<8>>>,
    pub range_checker: std::sync::Arc<VariableRangeCheckerChipGPU>,
    pub tuple_range_checker: Option<std::sync::Arc<RangeTupleCheckerChipGPU<2>>>,
    _marker: PhantomData<ISA>,
}

impl<ISA> PowdrPeripheryInstancesGpu<ISA> {
    pub fn new(
        range_checker: Arc<VariableRangeCheckerChipGPU>,
        bitwise_8: Option<Arc<BitwiseOperationLookupChipGPU<8>>>,
        tuple_range_checker: Option<Arc<RangeTupleCheckerChipGPU<2>>>,
        bus_ids: PeripheryBusIds,
    ) -> Self {
        Self {
            real: SharedPeripheryChipsGpu {
                bitwise_lookup_8: bitwise_8.clone(),
                range_checker: range_checker.clone(),
                tuple_range_checker: tuple_range_checker.clone(),
                _marker: PhantomData,
            },
            dummy: SharedPeripheryChipsGpu {
                // BitwiseLookupChipGPU is always initialized via `hybrid()` with a CPU chip in all available extensions of `SdkVmGpuBuilder::create_chip_complex()`.
                // In case this changes in the future, `cpu_chip.unwrap()` will panic, and we can fix the code.
                bitwise_lookup_8: bitwise_8.map(|bitwise_8| {
                    Arc::new(BitwiseOperationLookupChipGPU::hybrid(Arc::new(
                        BitwiseOperationLookupChip::new(
                            bitwise_8.as_ref().cpu_chip.as_ref().unwrap().bus(),
                        ),
                    )))
                }),
                range_checker: range_checker.clone(),
                // RangeTupleCheckerGPU is always initialized via `new()` without a CPU chip in all available extensions of `SdkVmGpuBuilder::create_chip_complex()`.
                // In case this changes in the future the `Some` matching arm below will catch it.
                tuple_range_checker: tuple_range_checker.map(|tuple_range_checker| {
                    Arc::new({
                        match tuple_range_checker.cpu_chip.as_ref() {
                            // None is the expected case
                            None => RangeTupleCheckerChipGPU::new(tuple_range_checker.sizes),
                            Some(cpu_chip) => RangeTupleCheckerChipGPU::hybrid(Arc::new(
                                RangeTupleCheckerChip::new(*cpu_chip.bus()),
                            )),
                        }
                    })
                }),
                _marker: PhantomData,
            },
            bus_ids,
        }
    }
}

impl<F: PrimeField32, ISA: OpenVmISA> VmExecutionExtension<F> for SharedPeripheryChipsGpu<ISA> {
    type Executor = DummyExecutor<F, ISA>;

    fn extend_execution(
        &self,
        _: &mut ExecutorInventoryBuilder<F, Self::Executor>,
    ) -> Result<(), ExecutorInventoryError> {
        // No executor to add for periphery chips
        Ok(())
    }
}

impl<SC: StarkGenericConfig, ISA: OpenVmISA> VmCircuitExtension<SC>
    for SharedPeripheryChipsGpu<ISA>
{
    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
        // create dummy airs
        if let Some(bitwise_lookup_8) = &self.bitwise_lookup_8 {
            assert!(inventory
                .find_air::<BitwiseOperationLookupAir<8>>()
                .next()
                .is_none());
            inventory.add_air(BitwiseOperationLookupAir::<8>::new(
                bitwise_lookup_8.cpu_chip.as_ref().unwrap().bus(),
            ));
        }

        if let Some(tuple_range_checker) = &self.tuple_range_checker {
            use openvm_circuit_primitives::range_tuple::RangeTupleCheckerBus;

            use crate::bus_map::DEFAULT_TUPLE_RANGE_CHECKER;

            assert!(inventory
                .find_air::<RangeTupleCheckerAir<2>>()
                .next()
                .is_none());
            // RangeTupleCheckerGPU is always initialized via `new()` without a CPU chip in all available extensions of `SdkVmGpuBuilder::create_chip_complex()`.
            // Therefore we create a new bus index, following a similar scenario in `Rv32M::extend_circuit`.
            // The bus id is hardcoded to the default and isn't guaranteed to be correct, because it depends on chip insertion order,
            // but this won't matter because the dummy chips are thrown away anyway.
            let bus = match tuple_range_checker.cpu_chip.as_ref() {
                // None is the expected case
                None => RangeTupleCheckerBus::new(
                    DEFAULT_TUPLE_RANGE_CHECKER as u16,
                    tuple_range_checker.sizes,
                ),
                Some(cpu_chip) => *cpu_chip.bus(),
            };
            inventory.add_air(RangeTupleCheckerAir::<2> { bus });
        }

        // The range checker is already present in the builder because it's is used by the system, so we don't add it again.
        assert!(inventory
            .find_air::<VariableRangeCheckerAir>()
            .nth(1)
            .is_none());

        Ok(())
    }
}

pub struct SharedPeripheryChipsGpuProverExt;

impl<ISA: OpenVmISA>
    VmProverExtension<GpuBabyBearPoseidon2Engine, DenseRecordArena, SharedPeripheryChipsGpu<ISA>>
    for SharedPeripheryChipsGpuProverExt
{
    fn extend_prover(
        &self,
        extension: &SharedPeripheryChipsGpu<ISA>,
        inventory: &mut ChipInventory<BabyBearSC, DenseRecordArena, GpuBackend>,
    ) -> Result<(), ChipInventoryError> {
        // Sanity check that the shared chips are not already present in the builder.
        if let Some(bitwise_lookup_8) = &extension.bitwise_lookup_8 {
            assert!(inventory
                .find_chip::<Arc<BitwiseOperationLookupChipGPU<8>>>()
                .next()
                .is_none());
            inventory.add_periphery_chip(bitwise_lookup_8.clone());
        }

        if let Some(tuple_checker) = &extension.tuple_range_checker {
            assert!(inventory
                .find_chip::<Arc<RangeTupleCheckerChipGPU<2>>>()
                .next()
                .is_none());
            inventory.add_periphery_chip(tuple_checker.clone());
        }

        // The range checker is already present in the builder because it's is used by the system, so we don't add it again.
        assert!(inventory
            .find_chip::<Arc<VariableRangeCheckerChipGPU>>()
            .next()
            .is_some());

        Ok(())
    }
}


================================================
FILE: openvm/src/powdr_extension/trace_generator/mod.rs
================================================
pub mod cpu;
#[cfg(feature = "cuda")]
pub mod cuda;

mod common;

pub use cpu::{DummyChipComplex, SharedPeripheryChipsCpu};

#[cfg(feature = "cuda")]
pub use cuda::{GpuDummyChipComplex, SharedPeripheryChipsGpu};


================================================
FILE: openvm/src/powdr_extension/vm.rs
================================================
// Mostly taken from [this openvm extension](https://github.com/openvm-org/openvm/blob/1b76fd5a900a7d69850ee9173969f70ef79c4c76/extensions/rv32im/circuit/src/extension.rs#L185) and simplified to only handle a single opcode with its necessary dependencies

use std::cell::RefCell;
use std::iter::once;
use std::rc::Rc;

use derive_more::From;
use openvm_circuit::arch::{DenseRecordArena, MatrixRecordArena};
#[cfg(not(feature = "tco"))]
use openvm_instructions::instruction::Instruction;
use openvm_instructions::LocalOpcode;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_openvm_bus_interaction_handler::bus_map::BusMap;

use crate::customize_exe::OvmApcStats;
use crate::extraction_utils::{OriginalAirs, OriginalVmConfig};
use crate::isa::{IsaApc, OpenVmISA};
use crate::powdr_extension::chip::PowdrAir;
use crate::powdr_extension::executor::{OriginalArenas, PowdrExecutor};
use crate::powdr_extension::PowdrOpcode;
use openvm_circuit::{
    arch::{AirInventory, AirInventoryError, VmCircuitExtension, VmExecutionExtension},
    circuit_derive::Chip,
};
use openvm_stark_backend::{
    config::{StarkGenericConfig, Val},
    p3_field::{Field, PrimeField32},
};
use serde::{Deserialize, Serialize};

#[derive(Clone, Deserialize, Serialize)]
#[serde(bound = "F: Field")]
pub struct PowdrExtension<F, ISA: OpenVmISA> {
    pub precompiles: Vec<PowdrPrecompile<F, ISA>>,
    pub base_config: OriginalVmConfig<ISA>,
    pub bus_map: BusMap,
    pub airs: OriginalAirs<F, ISA>,
}

#[derive(Clone, Serialize, Deserialize)]
#[serde(bound = "F: Field")]
pub struct PowdrPrecompile<F, ISA: OpenVmISA> {
    pub name: String,
    pub opcode: PowdrOpcode,
    pub apc: IsaApc<F, ISA>,
    pub apc_stats: OvmApcStats,
    #[serde(skip)]
    pub apc_record_arena_cpu: Rc<RefCell<OriginalArenas<MatrixRecordArena<F>>>>,
    #[serde(skip)]
    pub apc_record_arena_gpu: Rc<RefCell<OriginalArenas<DenseRecordArena>>>,
}

impl<F, ISA: OpenVmISA> PowdrPrecompile<F, ISA> {
    pub fn new(
        name: String,
        opcode: PowdrOpcode,
        apc: IsaApc<F, ISA>,
        apc_stats: OvmApcStats,
    ) -> Self {
        Self {
            name,
            opcode,
            apc,
            apc_stats,
            // Initialize with empty Rc (default to OriginalArenas::Uninitialized) for each APC
            apc_record_arena_cpu: Default::default(),
            apc_record_arena_gpu: Default::default(),
        }
    }
}

impl<F, ISA: OpenVmISA> PowdrExtension<F, ISA> {
    pub fn new(
        precompiles: Vec<PowdrPrecompile<F, ISA>>,
        base_config: OriginalVmConfig<ISA>,
        bus_map: BusMap,
        airs: OriginalAirs<F, ISA>,
    ) -> Self {
        Self {
            precompiles,
            base_config,
            bus_map,
            airs,
        }
    }
}

#[derive(From, Chip)]
#[allow(clippy::large_enum_variant)]
pub enum PowdrExtensionExecutor<ISA: OpenVmISA> {
    Powdr(PowdrExecutor<ISA>),
}

impl<ISA: OpenVmISA> VmExecutionExtension<BabyBear> for PowdrExtension<BabyBear, ISA> {
    type Executor = PowdrExtensionExecutor<ISA>;

    fn extend_execution(
        &self,
        inventory: &mut openvm_circuit::arch::ExecutorInventoryBuilder<BabyBear, Self::Executor>,
    ) -> Result<(), openvm_circuit::arch::ExecutorInventoryError> {
        for precompile in &self.precompiles {
            // The apc chip uses a single row per call
            let height_change = 1;

            let powdr_executor = PowdrExtensionExecutor::Powdr(PowdrExecutor::new(
                self.airs.clone(),
                self.base_config.clone(),
                precompile.apc.clone(),
                precompile.apc_record_arena_cpu.clone(),
                precompile.apc_record_arena_gpu.clone(),
                height_change,
            ));
            inventory.add_executor(powdr_executor, once(precompile.opcode.global_opcode()))?;
        }

        Ok(())
    }
}

impl<SC, ISA: OpenVmISA> VmCircuitExtension<SC> for PowdrExtension<Val<SC>, ISA>
where
    SC: StarkGenericConfig,
    Val<SC>: PrimeField32,
{
    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
        for precompile in &self.precompiles {
            inventory.add_air(PowdrAir::new(precompile.apc.machine.clone()));
        }
        Ok(())
    }
}

// We cannot derive the implementations below due to limitations in the openvm derives
impl<ISA: OpenVmISA> openvm_circuit::arch::AnyEnum for PowdrExtensionExecutor<ISA> {
    fn as_any_kind(&self) -> &dyn std::any::Any {
        match self {
            Self::Powdr(x) => x,
        }
    }

    fn as_any_kind_mut(&mut self) -> &mut dyn std::any::Any {
        match self {
            Self::Powdr(x) => x,
        }
    }
}

impl<ISA: OpenVmISA> openvm_circuit::arch::InterpreterExecutor<BabyBear>
    for PowdrExtensionExecutor<ISA>
{
    fn pre_compute_size(&self) -> usize {
        match self {
            Self::Powdr(x) => x.pre_compute_size(),
        }
    }

    #[cfg(not(feature = "tco"))]
    fn pre_compute<Ctx>(
        &self,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<
        openvm_circuit::arch::ExecuteFunc<BabyBear, Ctx>,
        openvm_circuit::arch::StaticProgramError,
    >
    where
        Ctx: openvm_circuit::arch::execution_mode::ExecutionCtxTrait,
    {
        match self {
            Self::Powdr(x) => x.pre_compute(pc, inst, data),
        }
    }

    #[cfg(feature = "tco")]
    fn handler<Ctx>(
        &self,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<
        openvm_circuit::arch::Handler<BabyBear, Ctx>,
        openvm_circuit::arch::StaticProgramError,
    >
    where
        Ctx: openvm_circuit::arch::execution_mode::ExecutionCtxTrait,
    {
        match self {
            Self::Powdr(x) => x.handler(pc, inst, data),
        }
    }
}

impl<ISA: OpenVmISA> openvm_circuit::arch::InterpreterMeteredExecutor<BabyBear>
    for PowdrExtensionExecutor<ISA>
{
    fn metered_pre_compute_size(&self) -> usize {
        match self {
            Self::Powdr(x) => x.metered_pre_compute_size(),
        }
    }

    #[cfg(not(feature = "tco"))]
    fn metered_pre_compute<Ctx>(
        &self,
        chip_idx: usize,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<
        openvm_circuit::arch::ExecuteFunc<BabyBear, Ctx>,
        openvm_circuit::arch::StaticProgramError,
    >
    where
        Ctx: openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait,
    {
        match self {
            Self::Powdr(x) => x.metered_pre_compute(chip_idx, pc, inst, data),
        }
    }

    #[cfg(feature = "tco")]
    fn metered_handler<Ctx>(
        &self,
        chip_idx: usize,
        pc: u32,
        inst: &Instruction<BabyBear>,
        data: &mut [u8],
    ) -> Result<
        openvm_circuit::arch::Handler<BabyBear, Ctx>,
        openvm_circuit::arch::StaticProgramError,
    >
    where
        Ctx: openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait,
    {
        match self {
            Self::Powdr(x) => x.metered_handler(chip_idx, pc, inst, data),
        }
    }
}

#[cfg(feature = "aot")]
impl<ISA: OpenVmISA> openvm_circuit::arch::AotExecutor<BabyBear> for PowdrExtensionExecutor<ISA>
where
    PowdrExecutor<ISA>: openvm_circuit::arch::AotExecutor<BabyBear>,
{
    fn is_aot_supported(&self, inst: &Instruction<BabyBear>) -> bool {
        match self {
            Self::Powdr(x) => x.is_aot_supported(inst),
        }
    }

    fn generate_x86_asm(
        &self,
        inst: &Instruction<BabyBear>,
        pc: u32,
    ) -> Result<String, openvm_circuit::arch::AotError> {
        match self {
            Self::Powdr(x) => x.generate_x86_asm(inst, pc),
        }
    }
}

#[cfg(feature = "aot")]
impl<ISA: OpenVmISA> openvm_circuit::arch::AotMeteredExecutor<BabyBear>
    for PowdrExtensionExecutor<ISA>
where
    PowdrExecutor<ISA>: openvm_circuit::arch::AotMeteredExecutor<BabyBear>,
{
    fn is_aot_metered_supported(&self, inst: &Instruction<BabyBear>) -> bool {
        match self {
            Self::Powdr(x) => x.is_aot_metered_supported(inst),
        }
    }

    fn generate_x86_metered_asm(
        &self,
        inst: &Instruction<BabyBear>,
        pc: u32,
        chip_idx: usize,
        config: &openvm_circuit::arch::SystemConfig,
    ) -> Result<String, openvm_circuit::arch::AotError> {
        match self {
            Self::Powdr(x) => x.generate_x86_metered_asm(inst, pc, chip_idx, config),
        }
    }
}

impl<ISA: OpenVmISA, RA> openvm_circuit::arch::PreflightExecutor<BabyBear, RA>
    for PowdrExtensionExecutor<ISA>
where
    PowdrExecutor<ISA>: openvm_circuit::arch::PreflightExecutor<BabyBear, RA>,
{
    fn execute(
        &self,
        state: openvm_circuit::arch::VmStateMut<
            BabyBear,
            openvm_circuit::system::memory::online::TracingMemory,
            RA,
        >,
        instruction: &Instruction<BabyBear>,
    ) -> Result<(), openvm_circuit::arch::ExecutionError> {
        match self {
            Self::Powdr(x) => x.execute(state, instruction),
        }
    }

    fn get_opcode_name(&self, opcode: usize) -> String {
        match self {
            Self::Powdr(x) => <PowdrExecutor<ISA> as openvm_circuit::arch::PreflightExecutor<
                BabyBear,
                RA,
            >>::get_opcode_name(x, opcode),
        }
    }
}


================================================
FILE: openvm/src/program.rs
================================================
use std::sync::Arc;

use openvm_instructions::exe::VmExe;
use openvm_instructions::program::Program as OpenVmProgram;
use openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::{collect_basic_blocks, BasicBlock, Program};
use powdr_autoprecompiles::DegreeBound;
use serde::{Deserialize, Serialize};

use crate::customize_exe::Instr;
use crate::extraction_utils::OriginalVmConfig;
use crate::isa::OpenVmISA;
use crate::{BabyBearOpenVmApcAdapter, SpecializedConfig};
#[derive(Serialize, Deserialize, Clone)]
#[serde(bound = "")]
pub struct CompiledProgram<ISA: OpenVmISA> {
    pub exe: Arc<VmExe<BabyBear>>,
    pub vm_config: SpecializedConfig<ISA>,
}

// the original openvm program and config without powdr extension, along with the elf
pub struct OriginalCompiledProgram<'a, ISA: OpenVmISA> {
    pub exe: Arc<VmExe<BabyBear>>,
    pub vm_config: OriginalVmConfig<ISA>,
    pub linked_program: ISA::LinkedProgram<'a>,
}

impl<'a, ISA: OpenVmISA> OriginalCompiledProgram<'a, ISA> {
    pub fn new(
        exe: Arc<VmExe<BabyBear>>,
        vm_config: OriginalVmConfig<ISA>,
        linked_program: ISA::LinkedProgram<'a>,
    ) -> Self {
        Self {
            exe,
            vm_config,
            linked_program,
        }
    }

    /// Segments the program into basic blocks
    pub fn collect_basic_blocks(&self) -> Vec<BasicBlock<Instr<BabyBear, ISA>>> {
        let jumpdest_set = ISA::get_jump_destinations(self);

        let program = Prog::from(&self.exe.program);

        collect_basic_blocks::<BabyBearOpenVmApcAdapter<ISA>>(&program, &jumpdest_set)
    }

    /// Converts to a `CompiledProgram` with the original vm config (without autoprecompiles).
    pub fn compiled_program(&self, degree_bound: DegreeBound) -> CompiledProgram<ISA> {
        CompiledProgram {
            exe: self.exe.clone(),
            vm_config: SpecializedConfig::new(self.vm_config.clone(), Vec::new(), degree_bound),
        }
    }
}

/// A newtype wrapper around `OpenVmProgram` to implement the `Program` trait.
/// This is necessary because we cannot implement a foreign trait for a foreign type.
pub struct Prog<'a, F>(&'a OpenVmProgram<F>);

impl<'a, F> From<&'a OpenVmProgram<F>> for Prog<'a, F> {
    fn from(program: &'a OpenVmProgram<F>) -> Self {
        Prog(program)
    }
}

impl<'a, F: PrimeField32, ISA: OpenVmISA> Program<Instr<F, ISA>> for Prog<'a, F> {
    fn base_pc(&self) -> u64 {
        self.0.pc_base as u64
    }

    fn instructions(&self) -> Box<dyn Iterator<Item = Instr<F, ISA>> + '_> {
        Box::new(
            self.0
                .instructions_and_debug_infos
                .iter()
                .filter_map(|x| x.as_ref().map(|i| Instr::from(i.0.clone()))),
        )
    }

    fn length(&self) -> u32 {
        self.0.instructions_and_debug_infos.len() as u32
    }
}


================================================
FILE: openvm/src/test_utils.rs
================================================
use itertools::Itertools;
use openvm_instructions::instruction::Instruction;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::SuperBlock;
use powdr_autoprecompiles::empirical_constraints::EmpiricalConstraints;
use powdr_autoprecompiles::evaluation::evaluate_apc;
use powdr_autoprecompiles::export::ExportOptions;
use powdr_autoprecompiles::{build, VmConfig};
use powdr_number::BabyBearField;
use std::fs;
use std::path::Path;

use crate::extraction_utils::OriginalVmConfig;
use crate::isa::OpenVmISA;
use crate::{BabyBearOpenVmApcAdapter, Instr, DEFAULT_DEGREE_BOUND};
use powdr_openvm_bus_interaction_handler::OpenVmBusInteractionHandler;

/// Compile a superblock into an APC snapshot string.
///
/// This builds the APC, evaluates it, and returns a formatted string containing
/// the instructions, evaluation stats, and machine rendering.
pub fn compile_apc<ISA: OpenVmISA>(
    original_config: &OriginalVmConfig<ISA>,
    superblock: SuperBlock<Instruction<BabyBear>>,
) -> String {
    let degree_bound = DEFAULT_DEGREE_BOUND;
    let airs = original_config.airs(degree_bound).unwrap();
    let bus_map = original_config.bus_map();

    let vm_config = VmConfig {
        instruction_handler: &airs,
        bus_interaction_handler: OpenVmBusInteractionHandler::<BabyBearField>::default(),
        bus_map: bus_map.clone(),
    };

    let superblock = superblock.map_instructions(Instr::<BabyBear, ISA>::from);
    // for aligning the output
    let max_pc_digits = superblock.pcs().max().unwrap().max(1).ilog10() as usize + 1;
    let superblock_str = superblock
        .instructions()
        .map(|(pc, inst)| format!("  {pc:>max_pc_digits$}: {}", ISA::format(&inst.inner)))
        .join("\n");

    let export_path = std::env::var("APC_EXPORT_PATH").ok();
    let export_level = std::env::var("APC_EXPORT_LEVEL").ok();

    let apc = build::<BabyBearOpenVmApcAdapter<ISA>>(
        superblock.clone(),
        vm_config.clone(),
        degree_bound,
        ExportOptions::from_env_vars(export_path, export_level, &superblock.start_pcs()),
        &EmpiricalConstraints::default(),
    )
    .unwrap();

    let apc_with_stats =
        evaluate_apc::<BabyBearOpenVmApcAdapter<ISA>>(vm_config.instruction_handler, apc);

    let evaluation = apc_with_stats.evaluation_result();
    let apc = &apc_with_stats.apc().machine;

    format!(
        "Instructions:\n{superblock_str}\n\n{evaluation}\n\n{}",
        apc.render(&bus_map)
    )
}

/// Assert that the APC output for a superblock matches the expected snapshot.
///
/// - `snapshot_base_dir`: The base directory for snapshot files (typically
///   `Path::new(env!("CARGO_MANIFEST_DIR")).join("tests").join("apc_snapshots")`).
/// - `module_name`: Subdirectory within the snapshot dir (e.g., "single_instructions").
/// - `test_name`: Name of the test, used as the snapshot filename (without extension).
///
/// Set the `UPDATE_EXPECT=1` environment variable to update snapshot files.
pub fn assert_apc_snapshot(
    actual: &str,
    snapshot_base_dir: &Path,
    module_name: &str,
    test_name: &str,
) {
    let expected_path = snapshot_base_dir
        .join(module_name)
        .join(format!("{test_name}.txt"));

    let should_update_expectation = std::env::var("UPDATE_EXPECT")
        .map(|v| v.as_str() == "1")
        .unwrap_or(false);

    let expected = expected_path
        .exists()
        .then(|| fs::read_to_string(&expected_path).unwrap());

    match (expected, should_update_expectation) {
        (Some(expected), _) if expected == actual => {
            // Test succeeded.
        }
        (Some(expected), false) => {
            // The expectation file exists, is different from "actual" and we are
            // not allowed to update it.
            pretty_assertions::assert_eq!(
                expected.trim(),
                actual.trim(),
                "The output of `{test_name}` does not match the expected output. \
                 To overwrite the expected output with the currently generated one, \
                 re-run the test with the environment variable `UPDATE_EXPECT=1` or \
                 delete the file `{test_name}.txt`.",
            );
        }
        _ => {
            // Expectation file does not exist or is different from "actual" and we are allowed to update it.
            fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
            fs::write(&expected_path, actual).unwrap();
            println!("Expected output for `{test_name}` was created. Re-run the test to confirm.");
        }
    }
}

/// Convenience function combining [`compile_apc`] and [`assert_apc_snapshot`].
pub fn assert_apc_machine_output<ISA: OpenVmISA>(
    original_config: &OriginalVmConfig<ISA>,
    program: SuperBlock<Instruction<BabyBear>>,
    snapshot_base_dir: &Path,
    module_name: &str,
    test_name: &str,
) {
    let actual = compile_apc::<ISA>(original_config, program);
    assert_apc_snapshot(&actual, snapshot_base_dir, module_name, test_name);
}


================================================
FILE: openvm/src/trace_generation.rs
================================================
use crate::PowdrSdkCpu;
use crate::SpecializedConfigCpuBuilder;
use crate::{isa::OpenVmISA, program::CompiledProgram, SpecializedConfig};
use openvm_circuit::arch::{
    execution_mode::Segment, Executor, MeteredExecutor, PreflightExecutionOutput,
    PreflightExecutor, VirtualMachine, VmBuilder, VmCircuitConfig, VmExecutionConfig, VmInstance,
};
use openvm_native_circuit::NativeConfig;
use openvm_sdk::{
    config::{AppConfig, DEFAULT_APP_LOG_BLOWUP},
    prover::vm::new_local_prover,
    GenericSdk, StdIn,
};
use openvm_stark_backend::config::Val;
use openvm_stark_backend::{keygen::types::MultiStarkProvingKey, prover::types::ProvingContext};
use openvm_stark_sdk::{
    config::{
        baby_bear_poseidon2::BabyBearPoseidon2Engine as CpuBabyBearPoseidon2Engine, FriParameters,
    },
    engine::{StarkEngine, StarkFriEngine},
};
use tracing::info_span;

use crate::BabyBearSC;

#[cfg(not(feature = "cuda"))]
use crate::PowdrSdkCpu as PowdrSdk;
#[cfg(feature = "cuda")]
use crate::PowdrSdkGpu as PowdrSdk;

#[cfg(not(feature = "cuda"))]
use crate::SpecializedConfigCpuBuilder as SpecializedConfigBuilder;
#[cfg(feature = "cuda")]
use crate::SpecializedConfigGpuBuilder as SpecializedConfigBuilder;

#[cfg(feature = "cuda")]
use openvm_cuda_backend::engine::GpuBabyBearPoseidon2Engine as BabyBearPoseidon2Engine;
#[cfg(not(feature = "cuda"))]
use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Engine;

/// Given a program and input, generates the trace segment by segment and calls the provided
/// callback with the VM, proving key, and proving context (containing the trace) for each segment.
pub fn do_with_trace<ISA: OpenVmISA>(
    program: &CompiledProgram<ISA>,
    inputs: StdIn,
    callback: impl FnMut(
        usize,
        &VirtualMachine<BabyBearPoseidon2Engine, SpecializedConfigBuilder<ISA>>,
        &MultiStarkProvingKey<BabyBearSC>,
        ProvingContext<<BabyBearPoseidon2Engine as StarkEngine>::PB>,
    ),
) -> Result<(), Box<dyn std::error::Error>> {
    let sdk = PowdrSdk::new(create_app_config(program))?;
    do_with_trace_with_sdk::<ISA, BabyBearPoseidon2Engine, SpecializedConfigBuilder<ISA>, _>(
        program, inputs, sdk, callback,
    )
}

/// Like [`do_with_trace`], but always uses the CPU engine and CPU VM config builder.
pub fn do_with_cpu_trace<ISA: OpenVmISA>(
    program: &CompiledProgram<ISA>,
    inputs: StdIn,
    callback: impl FnMut(
        usize,
        &VirtualMachine<CpuBabyBearPoseidon2Engine, SpecializedConfigCpuBuilder<ISA>>,
        &MultiStarkProvingKey<BabyBearSC>,
        ProvingContext<<CpuBabyBearPoseidon2Engine as StarkEngine>::PB>,
    ),
) -> Result<(), Box<dyn std::error::Error>> {
    let sdk = PowdrSdkCpu::new(create_app_config(program))?;
    do_with_trace_with_sdk::<ISA, CpuBabyBearPoseidon2Engine, SpecializedConfigCpuBuilder<ISA>, _>(
        program, inputs, sdk, callback,
    )
}

fn do_with_trace_with_sdk<ISA: OpenVmISA, E, VB, NB>(
    program: &CompiledProgram<ISA>,
    inputs: StdIn,
    sdk: GenericSdk<E, VB, NB>,
    mut callback: impl FnMut(
        usize,
        &VirtualMachine<E, VB>,
        &MultiStarkProvingKey<BabyBearSC>,
        ProvingContext<<E as StarkEngine>::PB>,
    ),
) -> Result<(), Box<dyn std::error::Error>>
where
    E: StarkFriEngine<SC = BabyBearSC>,
    VB: VmBuilder<E> + Clone,
    <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>
        + MeteredExecutor<Val<E::SC>>
        + PreflightExecutor<Val<E::SC>, VB::RecordArena>,
    NB: VmBuilder<E, VmConfig = NativeConfig> + Clone,
    <NativeConfig as VmExecutionConfig<Val<E::SC>>>::Executor:
        PreflightExecutor<Val<E::SC>, NB::RecordArena>,
{
    let exe = sdk.convert_to_exe(program.exe.clone())?;
    // Build owned vm instance, so we can mutate it later
    let vm_builder = sdk.app_vm_builder().clone();
    let vm_pk = sdk.app_pk().app_vm_pk.clone();
    let mut vm_instance: VmInstance<_, _> = new_local_prover(vm_builder, &vm_pk, exe.clone())?;

    vm_instance.reset_state(inputs.clone());
    let metered_ctx = vm_instance.vm.build_metered_ctx(&exe);
    let metered_interpreter = vm_instance.vm.metered_interpreter(vm_instance.exe())?;
    let (segments, _) = metered_interpreter.execute_metered(inputs.clone(), metered_ctx)?;
    let mut state = vm_instance.state_mut().take();

    // Move `vm` and `interpreter` out of `vm_instance`
    // (after this, you can't use `vm_instance` anymore).
    let mut vm = vm_instance.vm;
    let mut interpreter = vm_instance.interpreter;

    // Get reusable inputs for `debug_proving_ctx`, the mock prover API from OVM.
    let air_inv = vm.config().create_airs()?;
    let pk = air_inv.keygen::<E>(&vm.engine);

    for (seg_idx, segment) in segments.into_iter().enumerate() {
        let _segment_span = info_span!("prove_segment", segment = seg_idx).entered();
        // We need a separate span so the metric label includes "segment" from _segment_span
        let _prove_span = info_span!("total_proof").entered();
        let Segment {
            num_insns,
            trace_heights,
            ..
        } = segment;
        let from_state = Option::take(&mut state).unwrap();
        vm.transport_init_memory_to_device(&from_state.memory);
        let PreflightExecutionOutput {
            system_records,
            record_arenas,
            to_state,
        } = vm.execute_preflight(
            &mut interpreter,
            from_state,
            Some(num_insns),
            &trace_heights,
        )?;
        state = Some(to_state);

        let ctx = vm.generate_proving_ctx(system_records, record_arenas)?;

        callback(seg_idx, &vm, &pk, ctx);
    }
    Ok(())
}

fn create_app_config<ISA: OpenVmISA>(
    program: &CompiledProgram<ISA>,
) -> AppConfig<SpecializedConfig<ISA>> {
    let app_fri_params =
        FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_APP_LOG_BLOWUP);
    AppConfig::new(app_fri_params, program.vm_config.clone())
}


================================================
FILE: openvm/src/utils.rs
================================================
use core::fmt;
use std::{collections::BTreeMap, sync::Arc};

use itertools::Itertools;
use openvm_stark_backend::{
    air_builders::symbolic::{
        symbolic_expression::SymbolicExpression,
        symbolic_variable::{Entry, SymbolicVariable},
        SymbolicConstraints,
    },
    interaction::{Interaction, SymbolicInteraction},
    p3_field::PrimeField32,
};
use powdr_autoprecompiles::{
    expression::{try_convert, AlgebraicReference},
    symbolic_machine::SymbolicBusInteraction,
};
use powdr_expression::AlgebraicExpression;

use crate::bus_map::BusMap;

pub enum OpenVmReference {
    /// Reference to a witness column. The boolean indicates if the reference is to the next row.
    WitnessColumn(AlgebraicReference, bool),
    IsFirstRow,
    IsLastRow,
    IsTransition,
}

impl fmt::Display for OpenVmReference {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            OpenVmReference::WitnessColumn(reference, next) => {
                write!(f, "{}{}", reference.name, if *next { "'" } else { "" })
            }
            OpenVmReference::IsFirstRow => write!(f, "is_first_row"),
            OpenVmReference::IsLastRow => write!(f, "is_last_row"),
            OpenVmReference::IsTransition => write!(f, "is_transition"),
        }
    }
}

/// An unsupported OpenVM reference appeared, i.e., a non-zero offset or a reference to
/// is_first_row, is_last_row, or is_transition.
#[derive(Debug)]
pub struct UnsupportedOpenVmReferenceError;

impl TryFrom<OpenVmReference> for AlgebraicReference {
    type Error = UnsupportedOpenVmReferenceError;

    fn try_from(value: OpenVmReference) -> Result<Self, Self::Error> {
        match value {
            OpenVmReference::WitnessColumn(reference, false) => Ok(reference),
            _ => Err(UnsupportedOpenVmReferenceError),
        }
    }
}

pub fn symbolic_to_algebraic<F: PrimeField32>(
    expr: &SymbolicExpression<F>,
    columns: &[Arc<String>],
) -> AlgebraicExpression<F, OpenVmReference> {
    match expr {
        SymbolicExpression::Constant(c) => AlgebraicExpression::Number(*c),
        SymbolicExpression::Add { x, y, .. } => {
            symbolic_to_algebraic(x, columns) + symbolic_to_algebraic(y, columns)
        }
        SymbolicExpression::Sub { x, y, .. } => {
            symbolic_to_algebraic(x, columns) - symbolic_to_algebraic(y, columns)
        }
        SymbolicExpression::Mul { x, y, .. } => {
            symbolic_to_algebraic(x, columns) * symbolic_to_algebraic(y, columns)
        }
        SymbolicExpression::Neg { x, .. } => -symbolic_to_algebraic(x, columns),
        SymbolicExpression::Variable(SymbolicVariable { entry, index, .. }) => match entry {
            Entry::Main { offset, part_index } => {
                assert_eq!(*part_index, 0);
                let next = match *offset {
                    0 => false,
                    1 => true,
                    _ => panic!("Unexpected offset: {offset}"),
                };
                let name = columns.get(*index).unwrap_or_else(|| {
                    panic!("Column index out of bounds: {index}\nColumns: {columns:?}");
                });
                AlgebraicExpression::Reference(OpenVmReference::WitnessColumn(
                    AlgebraicReference {
                        name: name.clone(),
                        id: *index as u64,
                    },
                    next,
                ))
            }
            _ => unimplemented!(),
        },
        SymbolicExpression::IsFirstRow => {
            AlgebraicExpression::Reference(OpenVmReference::IsFirstRow)
        }
        SymbolicExpression::IsLastRow => AlgebraicExpression::Reference(OpenVmReference::IsLastRow),
        SymbolicExpression::IsTransition => {
            AlgebraicExpression::Reference(OpenVmReference::IsTransition)
        }
    }
}

pub fn openvm_bus_interaction_to_powdr<F: PrimeField32>(
    interaction: &SymbolicInteraction<F>,
    columns: &[Arc<String>],
) -> Result<SymbolicBusInteraction<F>, UnsupportedOpenVmReferenceError> {
    let id = interaction.bus_index as u64;

    let mult = try_convert(symbolic_to_algebraic(&interaction.count, columns))?;
    let args = interaction
        .message
        .iter()
        .map(|e| try_convert(symbolic_to_algebraic(e, columns)))
        .collect::<Result<_, _>>()?;

    Ok(SymbolicBusInteraction { id, mult, args })
}

pub fn get_pil<F: PrimeField32>(
    name: &str,
    constraints: &SymbolicConstraints<F>,
    columns: &Vec<Arc<String>>,
    public_values: Vec<String>,
    bus_map: &BusMap,
) -> String {
    let mut pil = format!(
        "
namespace {name};
    // Preamble
    col fixed is_first_row = [1] + [0]*;
    col fixed is_last_row = [0] + [1]*;
    col fixed is_transition = [0] + [1]* + [0];

"
    );

    pil.push_str(
        &bus_map
            .all_types_by_id()
            .iter()
            .map(|(id, bus_type)| format!("    let {bus_type} = {id};"))
            .join("\n"),
    );

    pil.push_str(
        "

    // Witness columns
",
    );

    // Declare witness columns
    for column in columns {
        pil.push_str(&format!("    col witness {column};\n"));
    }

    let (bus_interactions_by_bus, new_buses): (BTreeMap<_, _>, BTreeMap<_, _>) = constraints
        .interactions
        .iter()
        .map(|interaction| (interaction.bus_index, interaction))
        .into_group_map()
        .into_iter()
        .partition::<BTreeMap<_, _>, _>(|(bus_index, _)| {
            bus_map.all_types_by_id().contains_key(&(*bus_index as u64))
        });

    pil.push_str(
        "
    // Bus interactions (bus_index, fields, count)\n",
    );

    for (bus_index, interactions) in bus_interactions_by_bus {
        let bus_name = bus_map.bus_type(bus_index as u64).to_string();

        for interaction in interactions {
            format_bus_interaction(&mut pil, interaction, columns, &public_values, &bus_name);
        }
        pil.push('\n');
    }

    for (bus_index, interactions) in new_buses {
        let bus_name = format!("bus_{bus_index}");
        for interaction in interactions {
            format_bus_interaction(&mut pil, interaction, columns, &public_values, &bus_name);
        }
        pil.push('\n');
    }

    pil.push_str("    // Constraints\n");

    for constraint in &constraints.constraints {
        pil.push_str(&format!(
            "    {} = 0;\n",
            format_expr(constraint, columns, &public_values)
        ));
    }
    pil
}

fn format_bus_interaction<F: PrimeField32>(
    pil: &mut String,
    interaction: &Interaction<SymbolicExpression<F>>,
    columns: &[Arc<String>],
    public_values: &[String],
    bus_name: &str,
) {
    let Interaction { message, count, .. } = interaction;
    // We do not know what is a send or a receive
    let function_name = "bus_interaction";

    pil.push_str(&format!(
        "    std::protocols::bus::{}({bus_name}, [{}], {});\n",
        function_name,
        message
            .iter()
            .map(|value| format_expr(value, columns, public_values))
            .collect::<Vec<String>>()
            .join(", "),
        format_expr(count, columns, public_values)
    ));
}

fn format_expr<F: PrimeField32>(
    expr: &SymbolicExpression<F>,
    columns: &[Arc<String>],
    // TODO: Implement public references
    _public_values: &[String],
) -> String {
    symbolic_to_algebraic(expr, columns).to_string()
}


================================================
FILE: openvm-bus-interaction-handler/Cargo.toml
================================================
[package]
name = "powdr-openvm-bus-interaction-handler"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[dependencies]
powdr-autoprecompiles.workspace = true
powdr-expression.workspace = true
powdr-number.workspace = true
powdr-constraint-solver.workspace = true

itertools.workspace = true
serde.workspace = true


[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: openvm-bus-interaction-handler/src/bitwise_lookup.rs
================================================
use powdr_autoprecompiles::range_constraint_optimizer::RangeConstraints;
use powdr_constraint_solver::{
    grouped_expression::GroupedExpression, range_constraint::RangeConstraint,
};
use powdr_number::{FieldElement, LargeInt};

use super::byte_constraint;

pub fn handle_bitwise_lookup<T: FieldElement>(
    payload: &[RangeConstraint<T>],
) -> Vec<RangeConstraint<T>> {
    // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/bitwise_op_lookup/bus.rs
    // Expects (x, y, z, op), where:
    // - if op == 0, x & y are bytes, z = 0
    // - if op == 1, x & y are bytes, z = x ^ y

    let [x, y, _z, op] = payload else {
        panic!("Expected arguments (x, y, z, op)");
    };
    match op
        .try_to_single_value()
        .map(|v| v.to_integer().try_into_u64().unwrap())
    {
        // Range constraint on x & y, z = 0
        Some(0) => vec![
            byte_constraint(),
            byte_constraint(),
            RangeConstraint::from_value(T::zero()),
            RangeConstraint::from_value(T::zero()),
        ],
        // z = x ^ y
        Some(1) => {
            if let (Some(x), Some(y)) = (x.try_to_single_value(), y.try_to_single_value()) {
                // Both inputs are known, can compute result concretely
                let z = T::from(
                    x.to_integer().try_into_u64().unwrap() ^ y.to_integer().try_into_u64().unwrap(),
                );
                vec![
                    RangeConstraint::from_value(x),
                    RangeConstraint::from_value(y),
                    RangeConstraint::from_value(z),
                    RangeConstraint::from_value(T::one()),
                ]
            } else {
                // The result of an XOR can only be a byte and have bits set that are set in either x or y
                let z_constraint = RangeConstraint::from_mask(*x.mask() | *y.mask())
                    .conjunction(&byte_constraint());
                vec![
                    byte_constraint(),
                    byte_constraint(),
                    z_constraint,
                    RangeConstraint::from_value(T::one()),
                ]
            }
        }
        // Operation is unknown, but we know that x, y, and z are bytes
        // and that op is 0 or 1
        None => vec![
            byte_constraint(),
            byte_constraint(),
            byte_constraint(),
            RangeConstraint::from_mask(0x1u64),
        ],
        _ => panic!("Invalid operation"),
    }
}

pub fn bitwise_lookup_pure_range_constraints<T: FieldElement, V: Ord + Clone + Eq>(
    payload: &[GroupedExpression<T, V>],
) -> Option<RangeConstraints<T, V>> {
    // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/bitwise_op_lookup/bus.rs
    // Expects (x, y, z, op), where:
    // - if op == 0, x & y are bytes, z = 0
    // - if op == 1, x & y are bytes, z = x ^ y
    let [x, y, z, op] = payload else {
        panic!("Expected arguments (x, y, z, op)");
    };
    let byte_rc = RangeConstraint::from_mask(0xffu64);
    let zero_rc = RangeConstraint::from_value(T::zero());
    if op.try_to_number() == Some(T::from(0u64)) {
        Some(
            [
                (x.clone(), byte_rc),
                (y.clone(), byte_rc),
                (z.clone(), zero_rc),
            ]
            .into(),
        )
    } else if x == y {
        // This is a common pattern, because the `BaseAluCoreChip` range-constraints
        // the output of an addition by sending each limb as both operands to the XOR table:
        // https://github.com/openvm-org/openvm/blob/v1.0.0/extensions/rv32im/circuit/src/base_alu/core.rs#L131-L138
        // Note that this block also gets executed if `op` is unknown (but we know that `op` can only be 0 or 1).
        Some(
            [
                (x.clone(), byte_rc),
                (z.clone(), zero_rc),
                (op.clone(), RangeConstraint::from_mask(1)),
            ]
            .into(),
        )
    } else {
        None
    }
}

#[cfg(test)]
mod tests {
    use crate::{bus_map::DEFAULT_BITWISE_LOOKUP, test_utils::*, OpenVmBusInteractionHandler};

    use super::*;
    use powdr_constraint_solver::constraint_system::{BusInteraction, BusInteractionHandler};
    use powdr_number::BabyBearField;

    fn run(
        x: RangeConstraint<BabyBearField>,
        y: RangeConstraint<BabyBearField>,
        z: RangeConstraint<BabyBearField>,
        op: RangeConstraint<BabyBearField>,
    ) -> Vec<RangeConstraint<BabyBearField>> {
        let handler = OpenVmBusInteractionHandler::<BabyBearField>::default();

        let bus_interaction = BusInteraction {
            bus_id: RangeConstraint::from_value(DEFAULT_BITWISE_LOOKUP.into()),
            multiplicity: value(1),
            payload: vec![x, y, z, op],
        };
        let result = handler.handle_bus_interaction(bus_interaction);
        result.payload
    }

    #[test]
    fn test_byte_constraint() {
        let result = run(
            Default::default(),
            Default::default(),
            Default::default(),
            value(0),
        );

        assert_eq!(result.len(), 4);
        assert_eq!(result[0], mask(0xff));
        assert_eq!(result[1], mask(0xff));
        assert_eq!(result[2], value(0));
        assert_eq!(result[3], value(0));
    }

    #[test]
    fn test_xor_known() {
        let result = run(
            value(0b10101010),
            value(0b11001100),
            Default::default(),
            value(1),
        );

        assert_eq!(result.len(), 4);
        assert_eq!(result[0], value(0b10101010));
        assert_eq!(result[1], value(0b11001100));
        assert_eq!(result[2], value(0b01100110));
        assert_eq!(result[3], value(1));
    }

    #[test]
    fn test_xor_unknown() {
        let result = run(
            Default::default(),
            Default::default(),
            Default::default(),
            value(1),
        );

        assert_eq!(result.len(), 4);
        assert_eq!(result[0], mask(0xff));
        assert_eq!(result[1], mask(0xff));
        assert_eq!(result[2], mask(0xff));
        assert_eq!(result[3], value(1));
    }

    #[test]
    fn test_xor_one_unknown() {
        let result = run(mask(0xabcd), value(0), Default::default(), value(1));

        assert_eq!(result.len(), 4);
        // Note that this constraint could be tighter (0xcd), but the solver
        // will get to this by intersecting the result with the input
        // constraints.
        assert_eq!(result[0], mask(0xff));
        // Same here
        assert_eq!(result[1], mask(0xff));
        // We won't be able to compute the result, but we know that the range
        // constraint of `x` also applies to `z`.
        assert_eq!(result[2], mask(0xcd));
        assert_eq!(result[3], value(1));
    }

    #[test]
    fn test_unknown_operation() {
        let result = run(
            Default::default(),
            Default::default(),
            Default::default(),
            Default::default(),
        );

        assert_eq!(result.len(), 4);
        assert_eq!(result[0], mask(0xff));
        assert_eq!(result[1], mask(0xff));
        assert_eq!(result[2], mask(0xff));
        assert_eq!(result[3], mask(0x1));
    }
}


================================================
FILE: openvm-bus-interaction-handler/src/bus_map.rs
================================================
//! To support an abstracted autoprecompile layer, this module stores type implementations specific to OpenVM
use std::fmt::Display;

use powdr_autoprecompiles::bus_map::BusType;
use serde::{Deserialize, Serialize};

use crate::DEFAULT_RANGE_TUPLE_CHECKER_SIZES;

pub const DEFAULT_EXECUTION_BRIDGE: u64 = 0;
pub const DEFAULT_MEMORY: u64 = 1;
pub const DEFAULT_PC_LOOKUP: u64 = 2;
pub const DEFAULT_VARIABLE_RANGE_CHECKER: u64 = 3;
pub const DEFAULT_BITWISE_LOOKUP: u64 = 6;
pub const DEFAULT_TUPLE_RANGE_CHECKER: u64 = 7;

#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum OpenVmBusType {
    VariableRangeChecker,
    TupleRangeChecker([u32; 2]),
    BitwiseLookup,
}

pub type BusMap = powdr_autoprecompiles::bus_map::BusMap<OpenVmBusType>;

impl Display for OpenVmBusType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            OpenVmBusType::VariableRangeChecker => write!(f, "VARIABLE_RANGE_CHECKER"),
            OpenVmBusType::TupleRangeChecker(sizes) => {
                write!(f, "TUPLE_RANGE_CHECKER_{}_{}", sizes[0], sizes[1])
            }
            OpenVmBusType::BitwiseLookup => write!(f, "BITWISE_LOOKUP"),
        }
    }
}

pub fn default_openvm_bus_map() -> BusMap {
    let bus_ids = [
        (DEFAULT_EXECUTION_BRIDGE, BusType::ExecutionBridge),
        (DEFAULT_MEMORY, BusType::Memory),
        (DEFAULT_PC_LOOKUP, BusType::PcLookup),
        (
            DEFAULT_VARIABLE_RANGE_CHECKER,
            BusType::Other(OpenVmBusType::VariableRangeChecker),
        ),
        (
            DEFAULT_BITWISE_LOOKUP,
            BusType::Other(OpenVmBusType::BitwiseLookup),
        ),
        (
            DEFAULT_TUPLE_RANGE_CHECKER,
            BusType::Other(OpenVmBusType::TupleRangeChecker(
                DEFAULT_RANGE_TUPLE_CHECKER_SIZES,
            )),
        ),
    ];
    BusMap::from_id_type_pairs(bus_ids)
}


================================================
FILE: openvm-bus-interaction-handler/src/lib.rs
================================================
use std::fmt::Display;

use bitwise_lookup::handle_bitwise_lookup;
use itertools::Itertools;
use memory::handle_memory;
use powdr_autoprecompiles::{
    bus_map::BusType,
    constraint_optimizer::IsBusStateful,
    range_constraint_optimizer::{
        utils::{filter_byte_constraints, range_constraint_to_num_bits},
        MakeRangeConstraintsError, RangeConstraintHandler, RangeConstraints,
    },
};
use powdr_constraint_solver::{
    constraint_system::{BusInteraction, BusInteractionHandler},
    grouped_expression::GroupedExpression,
    range_constraint::RangeConstraint,
};
use powdr_number::{FieldElement, LargeInt};
use std::hash::Hash;
use variable_range_checker::handle_variable_range_checker;

use crate::{
    bitwise_lookup::bitwise_lookup_pure_range_constraints,
    bus_map::{default_openvm_bus_map, BusMap, OpenVmBusType},
    tuple_range_checker::TupleRangeCheckerHandler,
    variable_range_checker::variable_range_checker_pure_range_constraints,
};

mod bitwise_lookup;
pub mod bus_map;
mod memory;
pub mod memory_bus_interaction;
mod tuple_range_checker;
mod variable_range_checker;

#[derive(Clone)]
pub struct OpenVmBusInteractionHandler<T: FieldElement> {
    bus_map: BusMap,
    _phantom: std::marker::PhantomData<T>,
}

/// Taken from openvm implementation, should be kept in sync.
const DEFAULT_RANGE_TUPLE_CHECKER_SIZES: [u32; 2] = [1 << 8, 8 * (1 << 8)];

impl<T: FieldElement> Default for OpenVmBusInteractionHandler<T> {
    fn default() -> Self {
        Self::new(default_openvm_bus_map())
    }
}

impl<T: FieldElement> OpenVmBusInteractionHandler<T> {
    pub fn new(bus_map: BusMap) -> Self {
        Self {
            bus_map,
            _phantom: std::marker::PhantomData,
        }
    }

    pub fn tuple_range_checker_sizes(&self) -> [u32; 2] {
        self.bus_map
            .all_types_by_id()
            .values()
            .find_map(|ty| {
                if let BusType::Other(OpenVmBusType::TupleRangeChecker(sizes)) = ty {
                    Some(*sizes)
                } else {
                    None
                }
            })
            .unwrap()
    }
}

impl<T: FieldElement> BusInteractionHandler<T> for OpenVmBusInteractionHandler<T> {
    fn handle_bus_interaction(
        &self,
        bus_interaction: BusInteraction<RangeConstraint<T>>,
    ) -> BusInteraction<RangeConstraint<T>> {
        let (Some(bus_id), Some(multiplicity)) = (
            bus_interaction.bus_id.try_to_single_value(),
            bus_interaction.multiplicity.try_to_single_value(),
        ) else {
            return bus_interaction;
        };

        if multiplicity.is_zero() {
            return bus_interaction;
        }

        let payload_constraints = match self
            .bus_map
            .bus_type(bus_id.to_integer().try_into_u64().unwrap())
        {
            // Sends / receives (pc, timestamp) pairs. They could have any value.
            BusType::ExecutionBridge => bus_interaction.payload,
            // Sends a (pc, opcode, args..) tuple. In theory, we could refine the range constraints
            // of the args here, but for auto-precompiles, only the PC will be unknown, which could
            // have any value.
            BusType::PcLookup => bus_interaction.payload,
            BusType::Other(OpenVmBusType::BitwiseLookup) => {
                handle_bitwise_lookup(&bus_interaction.payload)
            }
            BusType::Memory => handle_memory(&bus_interaction.payload, multiplicity),
            BusType::Other(OpenVmBusType::VariableRangeChecker) => {
                handle_variable_range_checker(&bus_interaction.payload)
            }
            BusType::Other(OpenVmBusType::TupleRangeChecker(sizes)) => {
                TupleRangeCheckerHandler::new(sizes)
                    .handle_bus_interaction(&bus_interaction.payload)
            }
        };
        BusInteraction {
            payload: payload_constraints,
            ..bus_interaction
        }
    }
}

fn byte_constraint<T: FieldElement>() -> RangeConstraint<T> {
    RangeConstraint::from_mask(0xffu64)
}

impl<T: FieldElement> IsBusStateful<T> for OpenVmBusInteractionHandler<T> {
    fn is_stateful(&self, bus_id: T) -> bool {
        let bus_id = bus_id.to_integer().try_into_u64().unwrap();
        match self.bus_map.bus_type(bus_id) {
            BusType::ExecutionBridge => true,
            BusType::Memory => true,
            BusType::PcLookup => false,
            BusType::Other(OpenVmBusType::BitwiseLookup) => false,
            BusType::Other(OpenVmBusType::VariableRangeChecker) => false,
            BusType::Other(OpenVmBusType::TupleRangeChecker(_)) => false,
        }
    }
}

impl<T: FieldElement> RangeConstraintHandler<T> for OpenVmBusInteractionHandler<T> {
    fn pure_range_constraints<V: Ord + Clone + Eq>(
        &self,
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
    ) -> Option<RangeConstraints<T, V>> {
        let bus_id = bus_interaction
            .bus_id
            .try_to_number()
            .unwrap()
            .to_integer()
            .try_into_u64()
            .unwrap();
        match self.bus_map.bus_type(bus_id) {
            BusType::ExecutionBridge | BusType::Memory | BusType::PcLookup => None,
            BusType::Other(OpenVmBusType::BitwiseLookup) => {
                bitwise_lookup_pure_range_constraints(&bus_interaction.payload)
            }
            BusType::Other(OpenVmBusType::VariableRangeChecker) => {
                variable_range_checker_pure_range_constraints(&bus_interaction.payload)
            }
            BusType::Other(OpenVmBusType::TupleRangeChecker(sizes)) => {
                TupleRangeCheckerHandler::new(sizes)
                    .pure_range_constraints(&bus_interaction.payload)
            }
        }
    }

    fn batch_make_range_constraints<V: Ord + Clone + Eq + Display + Hash>(
        &self,
        mut range_constraints: RangeConstraints<T, V>,
    ) -> Result<Vec<BusInteraction<GroupedExpression<T, V>>>, MakeRangeConstraintsError> {
        let mut byte_constraints = filter_byte_constraints(&mut range_constraints);
        let tuple_range_checker_sizes = self.tuple_range_checker_sizes();
        let tuple_range_checker_ranges =
            TupleRangeCheckerHandler::new(tuple_range_checker_sizes).tuple_range_checker_ranges();
        assert_eq!(
            tuple_range_checker_ranges.0,
            RangeConstraint::from_mask(0xffu64),
        );

        // The tuple range checker bus can range-check two expressions at the same time.
        // We assume the first range is a byte range (see assertion above). From the remaining
        // range constraints, we find all that happen to require the second range and zip them
        // with the byte constraints.
        let (mut tuple_range_checker_second_args, mut range_constraints): (Vec<_>, Vec<_>) =
            range_constraints
                .into_iter()
                .partition(|(_expr, rc)| rc == &tuple_range_checker_ranges.1);
        if tuple_range_checker_second_args.len() > byte_constraints.len() {
            range_constraints
                .extend(tuple_range_checker_second_args.drain(byte_constraints.len()..));
        }
        let num_variable_range_checker_interactions = tuple_range_checker_second_args.len();

        let tuple_range_checker_constraints = byte_constraints
            .drain(..num_variable_range_checker_interactions)
            .zip_eq(tuple_range_checker_second_args)
            .map(|(byte_expr, (expr2, _rc))| {
                // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/range_tuple/bus.rs
                // Expects (x, y), where `x` is in the range [0, MAX_0] and `y` is in the range [0, MAX_1]
                let bus_id = self
                    .bus_map
                    .get_bus_id(&BusType::Other(OpenVmBusType::TupleRangeChecker(
                        tuple_range_checker_sizes,
                    )))
                    .unwrap();
                BusInteraction {
                    bus_id: GroupedExpression::from_number(T::from(bus_id)),
                    multiplicity: GroupedExpression::from_number(T::one()),
                    payload: vec![byte_expr.clone(), expr2.clone()],
                }
            })
            .collect::<Vec<_>>();

        let byte_constraints = byte_constraints
            .into_iter()
            .chunks(2)
            .into_iter()
            .map(|mut bytes| {
                // Use the bitwise lookup to range-check two bytes at the same time:
                // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/bitwise_op_lookup/bus.rs
                // Expects (x, y, z, op), where:
                // - if op == 0, x & y are bytes, z = 0
                // - if op == 1, x & y are bytes, z = x ^ y
                let byte1 = bytes.next().unwrap();
                let byte2 = bytes
                    .next()
                    .unwrap_or(GroupedExpression::from_number(T::zero()));

                let bus_id = self
                    .bus_map
                    .get_bus_id(&BusType::Other(OpenVmBusType::BitwiseLookup))
                    .unwrap();
                BusInteraction {
                    bus_id: GroupedExpression::from_number(T::from(bus_id)),
                    multiplicity: GroupedExpression::from_number(T::one()),
                    payload: vec![
                        byte1.clone(),
                        byte2.clone(),
                        GroupedExpression::from_number(T::zero()),
                        GroupedExpression::from_number(T::zero()),
                    ],
                }
            })
            .collect::<Vec<_>>();
        let other_constraints = range_constraints
            .into_iter()
            .map(|(expr, rc)| {
                // Use the variable range checker to range-check expressions:
                // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/var_range/bus.rs
                // Expects (x, bits), where `x` is in the range [0, 2^bits - 1]
                let Some(num_bits) = range_constraint_to_num_bits(&rc) else {
                    return Err(MakeRangeConstraintsError(format!(
                        "Failed to get number of bits from range constraint: {rc:?}"
                    )));
                };
                let bus_id = self
                    .bus_map
                    .get_bus_id(&BusType::Other(OpenVmBusType::VariableRangeChecker))
                    .unwrap();
                Ok(BusInteraction {
                    bus_id: GroupedExpression::from_number(T::from(bus_id)),
                    multiplicity: GroupedExpression::from_number(T::one()),
                    payload: vec![
                        expr,
                        GroupedExpression::from_number(T::from(num_bits as u64)),
                    ],
                })
            })
            .collect::<Result<Vec<_>, _>>()?;
        Ok(tuple_range_checker_constraints
            .into_iter()
            .chain(byte_constraints)
            .chain(other_constraints)
            .collect::<Vec<_>>())
    }
}

#[cfg(test)]
mod test_utils {

    use super::*;
    use powdr_number::BabyBearField;

    pub fn value(value: u64) -> RangeConstraint<BabyBearField> {
        RangeConstraint::from_value(BabyBearField::from(value))
    }

    pub fn mask(mask: u64) -> RangeConstraint<BabyBearField> {
        RangeConstraint::from_mask(mask)
    }

    pub fn range(start: u64, end: u64) -> RangeConstraint<BabyBearField> {
        RangeConstraint::from_range(BabyBearField::from(start), BabyBearField::from(end))
    }
}


================================================
FILE: openvm-bus-interaction-handler/src/memory.rs
================================================
use powdr_constraint_solver::range_constraint::RangeConstraint;
use powdr_number::{FieldElement, LargeInt};

use super::byte_constraint;

/// Taken from the openvm implementation, should be kept in sync.
pub const RV32_REGISTER_AS: u32 = 1;
/// Taken from the openvm implementation, should be kept in sync.
pub const RV32_MEMORY_AS: u32 = 2;

pub fn handle_memory<T: FieldElement>(
    payload: &[RangeConstraint<T>],
    multiplicity: T,
) -> Vec<RangeConstraint<T>> {
    // See: https://github.com/openvm-org/openvm/blob/main/crates/vm/src/system/memory/offline_checker/bus.rs
    // Expects (address_space, pointer, data, timestamp).
    let [address_space, pointer, data @ .., timestamp] = payload else {
        panic!();
    };
    assert!(!data.is_empty(), "Data must contain at least one element");

    if multiplicity != -T::one() {
        // The interaction is not a receive, we can't make assumptions about the ranges.
        return payload.to_vec();
    }

    let address_space_value = address_space
        .try_to_single_value()
        .map(|v| v.to_integer().try_into_u32().unwrap());

    match address_space_value {
        Some(RV32_REGISTER_AS | RV32_MEMORY_AS) => {
            let data = if address_space_value == Some(RV32_REGISTER_AS)
                && pointer.try_to_single_value() == Some(T::zero())
            {
                // By the assumption that x0 is never written to, we know the result.
                data.iter()
                    .map(|_| RangeConstraint::from_value(T::zero()))
                    .collect::<Vec<_>>()
            } else {
                // By the assumption that all data written to registers or memory are range-checked,
                // we can return a byte range constraint for the data.
                data.iter().map(|_| byte_constraint()).collect::<Vec<_>>()
            };

            [*address_space, *pointer]
                .into_iter()
                .chain(data)
                .chain(std::iter::once(*timestamp))
                .collect()
        }
        // Otherwise, we can't improve the constraints
        _ => payload.to_vec(),
    }
}

#[cfg(test)]
mod tests {
    use crate::{bus_map::DEFAULT_MEMORY, test_utils::*, OpenVmBusInteractionHandler};

    use super::*;
    use powdr_constraint_solver::constraint_system::{BusInteraction, BusInteractionHandler};
    use powdr_number::BabyBearField;

    fn run(
        address_space: RangeConstraint<BabyBearField>,
        pointer: RangeConstraint<BabyBearField>,
        data: Vec<RangeConstraint<BabyBearField>>,
        timestamp: RangeConstraint<BabyBearField>,
        multiplicity: BabyBearField,
    ) -> Vec<RangeConstraint<BabyBearField>> {
        let handler = OpenVmBusInteractionHandler::<BabyBearField>::default();

        let bus_interaction = BusInteraction {
            bus_id: RangeConstraint::from_value(DEFAULT_MEMORY.into()),
            multiplicity: RangeConstraint::from_value(multiplicity),
            payload: std::iter::once(address_space)
                .chain(std::iter::once(pointer))
                .chain(data)
                .chain(std::iter::once(timestamp))
                .collect(),
        };
        let result = handler.handle_bus_interaction(bus_interaction);
        result.payload
    }

    #[test]
    fn test_receive() {
        let address_space = value(RV32_MEMORY_AS as u64);
        let pointer = value(0x1234);
        let data = vec![Default::default(); 4];
        let timestamp = value(0x5678);

        let result = run(
            address_space,
            pointer,
            data,
            timestamp,
            -(BabyBearField::from(1)),
        );

        assert_eq!(result.len(), 7);
        assert_eq!(result[0], value(RV32_MEMORY_AS as u64));
        assert_eq!(result[1], value(0x1234));
        assert_eq!(result[2], byte_constraint());
        assert_eq!(result[3], byte_constraint());
        assert_eq!(result[4], byte_constraint());
        assert_eq!(result[5], byte_constraint());
        assert_eq!(result[6], value(0x5678));
    }

    #[test]
    fn test_send() {
        let address_space = value(RV32_MEMORY_AS as u64);
        let pointer = value(0x1234);
        let data = vec![Default::default(); 4];
        let timestamp = value(0x5678);

        let result = run(address_space, pointer, data, timestamp, 1.into());

        assert_eq!(result.len(), 7);
        assert_eq!(result[0], value(RV32_MEMORY_AS as u64));
        assert_eq!(result[1], value(0x1234));
        // For receives, the range constraints should not be modified.
        assert_eq!(result[2], Default::default());
        assert_eq!(result[3], Default::default());
        assert_eq!(result[4], Default::default());
        assert_eq!(result[5], Default::default());
        assert_eq!(result[6], value(0x5678));
    }
}


================================================
FILE: openvm-bus-interaction-handler/src/memory_bus_interaction.rs
================================================
use std::hash::Hash;
use std::{array::IntoIter, fmt::Display};

use powdr_autoprecompiles::memory_optimizer::{
    MemoryBusInteraction, MemoryBusInteractionConversionError, MemoryOp,
};
use powdr_constraint_solver::{
    constraint_system::BusInteraction, grouped_expression::GroupedExpression,
};
use powdr_number::FieldElement;

/// The memory address space for register memory operations.
pub const REGISTER_ADDRESS_SPACE: u32 = 1;

#[derive(Clone, Debug)]
pub struct OpenVmMemoryBusInteraction<T: FieldElement, V> {
    op: MemoryOp,
    address: OpenVmAddress<T, V>,
    data: Vec<GroupedExpression<T, V>>,
    timestamp: Vec<GroupedExpression<T, V>>,
}

#[derive(Clone, Hash, Eq, PartialEq, Debug)]
pub struct OpenVmAddress<T, V> {
    /// The address space (e.g. register, memory, native, etc.), always a concrete number.
    address_space: T,
    /// The address expression.
    local_address: GroupedExpression<T, V>,
}

impl<T: FieldElement, V> IntoIterator for OpenVmAddress<T, V> {
    type Item = GroupedExpression<T, V>;
    type IntoIter = IntoIter<GroupedExpression<T, V>, 2>;

    fn into_iter(self) -> Self::IntoIter {
        [
            GroupedExpression::from_number(self.address_space),
            self.local_address,
        ]
        .into_iter()
    }
}

impl<T: FieldElement, V: Ord + Clone + Eq + Display + Hash> MemoryBusInteraction<T, V>
    for OpenVmMemoryBusInteraction<T, V>
{
    type Address = OpenVmAddress<T, V>;

    fn try_from_bus_interaction(
        bus_interaction: &BusInteraction<GroupedExpression<T, V>>,
        memory_bus_id: u64,
    ) -> Result<Option<Self>, MemoryBusInteractionConversionError> {
        match bus_interaction.bus_id.try_to_number() {
            None => return Err(MemoryBusInteractionConversionError),
            Some(id) if id == memory_bus_id.into() => {}
            Some(_) => return Ok(None),
        }

        let op = match bus_interaction.multiplicity.try_to_number() {
            Some(n) if n == 1.into() => MemoryOp::SetNew,
            Some(n) if n == (-1).into() => MemoryOp::GetPrevious,
            _ => return Err(MemoryBusInteractionConversionError),
        };

        let [address_space, addr, data @ .., timestamp] = &bus_interaction.payload[..] else {
            panic!();
        };
        let Some(address_space) = address_space.try_to_number() else {
            panic!("Address space must be known!");
        };
        let address = OpenVmAddress {
            address_space,
            local_address: addr.clone(),
        };
        Ok(Some(OpenVmMemoryBusInteraction {
            op,
            address,
            data: data.to_vec(),
            timestamp: vec![timestamp.clone()],
        }))
    }

    fn addr(&self) -> Self::Address {
        self.address.clone()
    }

    fn data(&self) -> &[GroupedExpression<T, V>] {
        &self.data
    }

    fn timestamp_limbs(&self) -> &[GroupedExpression<T, V>] {
        &self.timestamp
    }

    fn op(&self) -> MemoryOp {
        self.op
    }
}


================================================
FILE: openvm-bus-interaction-handler/src/tuple_range_checker.rs
================================================
use powdr_autoprecompiles::range_constraint_optimizer::RangeConstraints;
use powdr_constraint_solver::{
    grouped_expression::GroupedExpression, range_constraint::RangeConstraint,
};
use powdr_number::FieldElement;

#[derive(Clone)]
pub struct TupleRangeCheckerHandler {
    range_tuple_checker_sizes: [u32; 2],
}

impl TupleRangeCheckerHandler {
    pub fn new(range_tuple_checker_sizes: [u32; 2]) -> Self {
        Self {
            range_tuple_checker_sizes,
        }
    }

    pub fn tuple_range_checker_ranges<T: FieldElement>(
        &self,
    ) -> (RangeConstraint<T>, RangeConstraint<T>) {
        (
            RangeConstraint::from_range(T::zero(), T::from(self.range_tuple_checker_sizes[0] - 1)),
            RangeConstraint::from_range(T::zero(), T::from(self.range_tuple_checker_sizes[1] - 1)),
        )
    }

    pub fn handle_bus_interaction<T: FieldElement>(
        &self,
        payload: &[RangeConstraint<T>],
    ) -> Vec<RangeConstraint<T>> {
        // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/range_tuple/bus.rs
        // Expects (x, y), where `x` is in the range [0, MAX_0] and `y` is in the range [0, MAX_1]
        let [_x, _y] = payload else {
            panic!("Expected arguments (x, y)");
        };

        let (x_rc, y_rc) = self.tuple_range_checker_ranges();
        vec![x_rc, y_rc]
    }

    pub fn pure_range_constraints<T: FieldElement, V: Ord + Clone + Eq>(
        &self,
        payload: &[GroupedExpression<T, V>],
    ) -> Option<RangeConstraints<T, V>> {
        // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/range_tuple/bus.rs
        // Expects (x, y), where `x` is in the range [0, MAX_0] and `y` is in the range [0, MAX_1]
        let [x, y] = payload else {
            panic!("Expected arguments (x, y)");
        };
        let (x_rc, y_rc) = self.tuple_range_checker_ranges();
        Some([(x.clone(), x_rc), (y.clone(), y_rc)].into())
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        bus_map::DEFAULT_TUPLE_RANGE_CHECKER, test_utils::value, OpenVmBusInteractionHandler,
    };

    use super::*;
    use powdr_constraint_solver::constraint_system::{BusInteraction, BusInteractionHandler};
    use powdr_number::BabyBearField;

    fn run(
        x: RangeConstraint<BabyBearField>,
        y: RangeConstraint<BabyBearField>,
    ) -> Vec<RangeConstraint<BabyBearField>> {
        let handler = OpenVmBusInteractionHandler::<BabyBearField>::default();

        let bus_interaction = BusInteraction {
            bus_id: RangeConstraint::from_value(DEFAULT_TUPLE_RANGE_CHECKER.into()),
            multiplicity: value(1),
            payload: vec![x, y],
        };
        let result = handler.handle_bus_interaction(bus_interaction);
        result.payload
    }

    #[test]
    fn test_unknown() {
        let x = Default::default();
        let y = Default::default();
        let result = run(x, y);
        assert_eq!(result.len(), 2);
        let (x_rc, y_rc) = (
            RangeConstraint::from_range(BabyBearField::from(0), BabyBearField::from(255)),
            RangeConstraint::from_range(
                BabyBearField::from(0),
                BabyBearField::from(8 * (1 << 8) - 1),
            ),
        );
        assert_eq!(result[0], x_rc);
        assert_eq!(result[1], y_rc);
    }
}


================================================
FILE: openvm-bus-interaction-handler/src/variable_range_checker.rs
================================================
use powdr_autoprecompiles::range_constraint_optimizer::RangeConstraints;
use powdr_constraint_solver::{
    grouped_expression::GroupedExpression, range_constraint::RangeConstraint,
};
use powdr_number::{FieldElement, LargeInt};

/// The maximum number of bits that can be checked by the variable range checker.
// TODO: This should be configurable
const MAX_BITS: u64 = 25;

/// Implements [BusInteractionHandler::handle_bus_interaction] for the variable range checker bus,
/// tightening the currently known range constraints.
pub fn handle_variable_range_checker<T: FieldElement>(
    payload: &[RangeConstraint<T>],
) -> Vec<RangeConstraint<T>> {
    // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/var_range/bus.rs
    // Expects (x, bits), where `x` is in the range [0, 2^bits - 1]
    let [_x, bits] = payload else {
        panic!("Expected arguments (x, bits)");
    };
    match bits.try_to_single_value() {
        Some(bits_value) if bits_value.to_degree() <= MAX_BITS => {
            let bits_value = bits_value.to_integer().try_into_u64().unwrap();
            let mask = (1u64 << bits_value) - 1;
            vec![RangeConstraint::from_mask(mask), *bits]
        }
        _ => {
            vec![
                RangeConstraint::from_mask((1u64 << MAX_BITS) - 1),
                RangeConstraint::from_range(T::from(0), T::from(MAX_BITS)),
            ]
        }
    }
}

pub fn variable_range_checker_pure_range_constraints<T: FieldElement, V: Ord + Clone + Eq>(
    payload: &[GroupedExpression<T, V>],
) -> Option<RangeConstraints<T, V>> {
    // See: https://github.com/openvm-org/openvm/blob/v1.0.0/crates/circuits/primitives/src/var_range/bus.rs
    // Expects (x, bits), where `x` is in the range [0, 2^bits - 1]
    let [x, bits] = payload else {
        panic!("Expected arguments (x, bits)");
    };
    bits.try_to_number().map(|bits| {
        [(
            x.clone(),
            RangeConstraint::from_mask((1u64 << bits.to_degree()) - 1),
        )]
        .into()
    })
}

#[cfg(test)]
mod tests {
    use crate::{
        bus_map::DEFAULT_VARIABLE_RANGE_CHECKER,
        test_utils::{mask, range, value},
        OpenVmBusInteractionHandler,
    };

    use super::*;
    use powdr_constraint_solver::constraint_system::{BusInteraction, BusInteractionHandler};
    use powdr_number::BabyBearField;

    fn run(
        x: RangeConstraint<BabyBearField>,
        bits: RangeConstraint<BabyBearField>,
    ) -> Vec<RangeConstraint<BabyBearField>> {
        let handler = OpenVmBusInteractionHandler::<BabyBearField>::default();

        let bus_interaction = BusInteraction {
            bus_id: RangeConstraint::from_value(DEFAULT_VARIABLE_RANGE_CHECKER.into()),
            multiplicity: value(1),
            payload: vec![x, bits],
        };
        let result = handler.handle_bus_interaction(bus_interaction);
        result.payload
    }

    #[test]
    fn test_unknown_bits() {
        let x = Default::default();
        let bits = Default::default();
        let result = run(x, bits);
        assert_eq!(result.len(), 2);
        assert_eq!(
            result[0],
            RangeConstraint::from_mask((1u64 << MAX_BITS) - 1)
        );
        assert_eq!(result[1], range(0, MAX_BITS));
    }

    #[test]
    fn test_known_bits() {
        let x = Default::default();
        let bits = value(12);
        let result = run(x, bits);
        assert_eq!(result.len(), 2);
        assert_eq!(result[0], mask(0xfff));
        assert_eq!(result[1], value(12));
    }
}


================================================
FILE: openvm-riscv/.gitignore
================================================
# Generated by Cargo
# will have compiled files and executables
debug/
target/

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
#Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

# RustRover
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

guest/openvm
guest-keccak/target

================================================
FILE: openvm-riscv/Cargo.toml
================================================
[package]
name = "powdr-openvm-riscv"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[features]
default = []
aot = ["powdr-openvm/aot", "powdr-openvm-riscv-hints-circuit/aot"]
tco = ["powdr-openvm/tco", "powdr-openvm-riscv-hints-circuit/tco"]
metrics = ["powdr-openvm/metrics"]
cuda = ["powdr-openvm/cuda"]

[dependencies]
openvm.workspace = true
openvm-build.workspace = true
openvm-rv32im-circuit.workspace = true
openvm-rv32im-transpiler.workspace = true
openvm-rv32im-guest.workspace = true
openvm-transpiler.workspace = true
openvm-circuit.workspace = true
openvm-circuit-derive.workspace = true
openvm-circuit-primitives.workspace = true
openvm-circuit-primitives-derive.workspace = true
openvm-instructions.workspace = true
openvm-instructions-derive.workspace = true
openvm-sdk.workspace = true
openvm-ecc-circuit.workspace = true
openvm-ecc-transpiler.workspace = true
openvm-keccak256-circuit.workspace = true
openvm-keccak256-transpiler.workspace = true
openvm-sha256-circuit.workspace = true
openvm-sha256-transpiler.workspace = true
openvm-algebra-circuit.workspace = true
openvm-algebra-transpiler.workspace = true
openvm-bigint-circuit.workspace = true
openvm-bigint-transpiler.workspace = true
openvm-pairing-circuit.workspace = true
openvm-pairing-transpiler.workspace = true
openvm-native-circuit.workspace = true
openvm-native-recursion.workspace = true

openvm-stark-sdk.workspace = true
openvm-stark-backend.workspace = true

powdr-expression.workspace = true
powdr-number.workspace = true
powdr-riscv-elf.workspace = true
powdr-autoprecompiles.workspace = true
powdr-constraint-solver.workspace = true
powdr-openvm-bus-interaction-handler.workspace = true
powdr-openvm.workspace = true

powdr-openvm-riscv-hints-transpiler.workspace = true
powdr-openvm-riscv-hints-circuit.workspace = true

eyre.workspace = true
serde.workspace = true
derive_more.workspace = true
itertools.workspace = true

tracing.workspace = true
tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }

clap = { version = "^4.3", features = ["derive"] }

log.workspace = true
struct-reflection = { git = "https://github.com/gzanitti/struct-reflection-rs.git" }

metrics.workspace = true
toml = "0.8.14"

rustc-demangle = "0.1.25"

cfg-if = "1.0.0"

[dev-dependencies]
powdr-openvm = { workspace = true, features = ["test-utils"] }
test-log.workspace = true
tempfile = "3.20.0"
pretty_assertions.workspace = true
openvm-ecc-circuit.workspace = true
openvm-algebra-circuit.workspace = true
openvm-bigint-circuit.workspace = true
openvm-pairing-circuit.workspace = true
openvm-pairing-transpiler.workspace = true
expect-test = "1.5.1"
tracing-log = "0.2.0"

[build-dependencies]
openvm-cuda-builder = { workspace = true, optional = true }

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: openvm-riscv/extensions/hints-circuit/Cargo.toml
================================================
[package]
name = "powdr-openvm-riscv-hints-circuit"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[features]
default = []
aot = ["openvm-circuit/aot", "openvm-rv32im-circuit/aot"]
tco = ["openvm-circuit/tco", "openvm-rv32im-circuit/tco"]

[dependencies]
openvm-circuit = { workspace = true }
openvm-instructions = { workspace = true }
openvm-rv32im-circuit = { workspace = true }
openvm-stark-backend = { workspace = true }
openvm-stark-sdk = { workspace = true }
powdr-openvm-riscv-hints-transpiler = { workspace = true }
eyre.workspace = true
crypto-bigint = "0.6.1"
elliptic-curve = "0.13.8"
rand = { version = "0.8.5", default-features = false }
serde.workspace = true


================================================
FILE: openvm-riscv/extensions/hints-circuit/src/executors.rs
================================================
use openvm_circuit::arch::{PhantomSubExecutor, Streams};
use openvm_circuit::system::memory::online::GuestMemory;
use openvm_instructions::riscv::RV32_MEMORY_AS;
use openvm_instructions::PhantomDiscriminant;
use openvm_rv32im_circuit::adapters::read_rv32_register;
use openvm_stark_backend::p3_field::PrimeField32;
use rand::rngs::StdRng;

use crate::field10x26_k256;

/// Example hint implementation.
/// Takes a single u32 as input and sets the hint to be the bytes of the u32 in reverse order.
pub struct ReverseBytesSubEx;

impl<F: PrimeField32> PhantomSubExecutor<F> for ReverseBytesSubEx {
    fn phantom_execute(
        &self,
        memory: &GuestMemory,
        streams: &mut Streams<F>,
        _: &mut StdRng,
        _: PhantomDiscriminant,
        a: u32,
        _: u32,
        c_upper: u16,
    ) -> eyre::Result<()> {
        assert_eq!(c_upper, 0);
        // read register
        let rs1 = read_rv32_register(memory, a);
        // read memory
        let bytes = unsafe { memory.read::<u8, 4>(RV32_MEMORY_AS, rs1) };
        // write hint as bytes in reverse
        let hint_bytes = bytes
            .into_iter()
            .rev()
            .map(|b| F::from_canonical_u8(b))
            .collect();
        streams.hint_stream = hint_bytes;
        Ok(())
    }
}

/// Takes as input a pointer to 32 bytes, the SEC1 encoding (i.e., big-endian) of a k256 coordinate field element.
/// Sets the hint to be the inverse of the field element in the same encoding (if not zero).
/// Sets the hint to zero when the input is zero.
pub struct K256InverseFieldSubEx;

use crypto_bigint::const_monty_form;
use crypto_bigint::impl_modulus;
use crypto_bigint::modular::ConstMontyParams;
use crypto_bigint::Encoding;
use crypto_bigint::Zero;
use crypto_bigint::U256;
impl_modulus!(
    K256Mod,
    U256,
    "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"
);

impl<F: PrimeField32> PhantomSubExecutor<F> for K256InverseFieldSubEx {
    fn phantom_execute(
        &self,
        memory: &GuestMemory,
        streams: &mut Streams<F>,
        _: &mut StdRng,
        _: PhantomDiscriminant,
        a: u32,
        _: u32,
        c_upper: u16,
    ) -> eyre::Result<()> {
        assert_eq!(c_upper, 0);
        // read register
        let rs1 = read_rv32_register(memory, a);
        // read the field element
        let bytes: [u8; 32] = unsafe { memory.read::<u8, 32>(RV32_MEMORY_AS, rs1) };

        let n = U256::from_be_bytes(bytes);

        // perform the inverse.
        let n_mod = const_monty_form!(n, K256Mod);
        let n_inv = if !(bool::from(n_mod.is_zero())) {
            n_mod.inv().unwrap().retrieve()
        } else {
            U256::ZERO
        };
        let inv_bytes = n_inv
            .to_be_bytes()
            .into_iter()
            .map(|b| F::from_canonical_u8(b))
            .collect();
        streams.hint_stream = inv_bytes;

        Ok(())
    }
}

/// Size in bytes of the k256 field element in 10x26 representation.
const FIELD10X26_BYTES: usize = 40; // [u32;10]

/// Takes as input a pointer to the inner representation of a k256 coordinate field element (in 32-bit architectures).
/// Sets the hint to be the inverse of the input (if not zero), in the same representation.
/// If the input is zero (normalized or not), the hint is also set, but undefined.
pub struct K256InverseField10x26SubEx;

impl<F: PrimeField32> PhantomSubExecutor<F> for K256InverseField10x26SubEx {
    fn phantom_execute(
        &self,
        memory: &GuestMemory,
        streams: &mut Streams<F>,
        _: &mut StdRng,
        _: PhantomDiscriminant,
        a: u32,
        _: u32,
        c_upper: u16,
    ) -> eyre::Result<()> {
        assert_eq!(c_upper, 0);
        // read register
        let rs1 = read_rv32_register(memory, a);
        // read the k256 field_10x26 as raw bytes
        let bytes: [u8; FIELD10X26_BYTES] =
            unsafe { memory.read::<u8, { FIELD10X26_BYTES }>(RV32_MEMORY_AS, rs1) };

        // we just reinterpret the bytes as a k256 field element. We don't use mem::transmute to avoid alignment issues
        let mut elem = [0u32; 10];
        unsafe {
            std::ptr::copy_nonoverlapping(
                bytes.as_ptr(),
                elem.as_mut_ptr() as *mut u8,
                FIELD10X26_BYTES,
            );
        }
        let elem = field10x26_k256::FieldElement10x26(elem);
        let inv = elem.invert().normalize();
        // okay to transmute in the opposite direction
        let inv_bytes: [u8; FIELD10X26_BYTES] = unsafe { std::mem::transmute(inv.0) };
        streams.hint_stream = inv_bytes
            .into_iter()
            .map(|b| F::from_canonical_u8(b))
            .collect();

        Ok(())
    }
}

/// Pre-defined non-quadratic residue for k256.
/// The same value should be used by the guest to check the non-square case.
const K256_NON_QUADRATIC_RESIDUE: field10x26_k256::FieldElement10x26 =
    field10x26_k256::FieldElement10x26([3, 0, 0, 0, 0, 0, 0, 0, 0, 0]);

/// Takes as input a pointer to the inner representation of a k256 coordinate field element (in 32-bit architectures).
/// If the number is square, sets the hint an u32 of value one, followed by a square root in the same inner representation.
/// If the number is not square, sets the hint to an u32 of value zero.
pub struct K256SqrtField10x26SubEx;

impl<F: PrimeField32> PhantomSubExecutor<F> for K256SqrtField10x26SubEx {
    fn phantom_execute(
        &self,
        memory: &GuestMemory,
        streams: &mut Streams<F>,
        _: &mut StdRng,
        _: PhantomDiscriminant,
        a: u32,
        _: u32,
        c_upper: u16,
    ) -> eyre::Result<()> {
        assert_eq!(c_upper, 0);
        // read register
        let rs1 = read_rv32_register(memory, a);
        // read the k256 field_10x26 as raw bytes
        let bytes: [u8; FIELD10X26_BYTES] =
            unsafe { memory.read::<u8, { FIELD10X26_BYTES }>(RV32_MEMORY_AS, rs1) };

        // we just reinterpret the bytes as a k256 field element. Can't use mem::transmute due to alighment requirements
        let mut elem = [0u32; 10];
        unsafe {
            std::ptr::copy_nonoverlapping(
                bytes.as_ptr(),
                elem.as_mut_ptr() as *mut u8,
                FIELD10X26_BYTES,
            );
        }
        let elem = field10x26_k256::FieldElement10x26(elem);
        let res = elem.sqrt();
        if res.is_some().into() {
            // return 1 followed by the result
            let bytes: [u8; FIELD10X26_BYTES] = unsafe {
                // safe to transmute into u8 array
                std::mem::transmute(res.unwrap().0)
            };
            streams.hint_stream = 1u32
                .to_le_bytes() // indicates that a square root exists
                .into_iter()
                .chain(bytes)
                .map(|b| F::from_canonical_u8(b))
                .collect();
        } else {
            // Number is not square.
            // Find the square root of the number times the predefined non-quadratic residue
            let res = (elem.mul(&K256_NON_QUADRATIC_RESIDUE)).sqrt().unwrap();
            let bytes: [u8; FIELD10X26_BYTES] = unsafe {
                // safe to transmute into u8 array
                std::mem::transmute(res.0)
            };
            streams.hint_stream = 0u32
                .to_le_bytes() // indicate number is not square
                .into_iter()
                .chain(bytes)
                .map(|b| F::from_canonical_u8(b))
                .collect();
        }

        Ok(())
    }
}


================================================
FILE: openvm-riscv/extensions/hints-circuit/src/field10x26_k256.rs
================================================
//! The code here has been mostly copied from the `k256` crate.
//! Its the 32-bit implementation of the field element.

use elliptic_curve::consts::U32;
use elliptic_curve::{
    subtle::{Choice, ConditionallySelectable, ConstantTimeEq, CtOption},
    zeroize::Zeroize,
    FieldBytesEncoding,
};
// use crypto_bigint::U256;
use elliptic_curve::bigint::ArrayEncoding;
use elliptic_curve::bigint::U256;

pub type FieldBytes = elliptic_curve::FieldBytes<Secp256k1>;

/// Order of the secp256k1 elliptic curve in hexadecimal.
const ORDER_HEX: &str = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141";

/// Order of the secp256k1 elliptic curve.
const ORDER: U256 = U256::from_be_hex(ORDER_HEX);

#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct Secp256k1;

impl elliptic_curve::Curve for Secp256k1 {
    /// 32-byte serialized field elements.
    type FieldBytesSize = U32;

    /// 256-bit field modulus.
    type Uint = U256;

    /// Curve order.
    const ORDER: U256 = ORDER;
}

impl FieldBytesEncoding<Secp256k1> for U256 {
    fn decode_field_bytes(field_bytes: &FieldBytes) -> Self {
        U256::from_be_byte_array(*field_bytes)
    }

    fn encode_field_bytes(&self) -> FieldBytes {
        self.to_be_byte_array()
    }
}

impl elliptic_curve::PrimeCurve for Secp256k1 {}

// -----------------------------------------------------------------------------------------------------

/// Scalars modulo SECP256k1 modulus (2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1).
/// Uses 10 32-bit limbs (little-endian), where in the normalized form
/// first 9 contain 26 bits of the value each, and the last one contains 22 bits.
/// CurveArithmetic operations can be done without modulo reduction for some time,
/// using the remaining overflow bits.
#[derive(Clone, Copy, Debug)]
pub struct FieldElement10x26(pub(crate) [u32; 10]);

// TODO: maybe instead clean this file up and only keep code that is used?
#[allow(unused)]
impl FieldElement10x26 {
    /// Zero element.
    pub const ZERO: Self = Self([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);

    /// Multiplicative identity.
    pub const ONE: Self = Self([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]);

    /// Attempts to parse the given byte array as an SEC1-encoded field element.
    /// Does not check the result for being in the correct range.
    pub(crate) const fn from_bytes_unchecked(bytes: &[u8; 32]) -> Self {
        let w0 = (bytes[31] as u32)
            | ((bytes[30] as u32) << 8)
            | ((bytes[29] as u32) << 16)
            | (((bytes[28] & 0x3) as u32) << 24);
        let w1 = (((bytes[28] >> 2) as u32) & 0x3f)
            | ((bytes[27] as u32) << 6)
            | ((bytes[26] as u32) << 14)
            | (((bytes[25] & 0xf) as u32) << 22);
        let w2 = (((bytes[25] >> 4) as u32) & 0xf)
            | ((bytes[24] as u32) << 4)
            | ((bytes[23] as u32) << 12)
            | (((bytes[22] & 0x3f) as u32) << 20);
        let w3 = (((bytes[22] >> 6) as u32) & 0x3)
            | ((bytes[21] as u32) << 2)
            | ((bytes[20] as u32) << 10)
            | ((bytes[19] as u32) << 18);
        let w4 = (bytes[18] as u32)
            | ((bytes[17] as u32) << 8)
            | ((bytes[16] as u32) << 16)
            | (((bytes[15] & 0x3) as u32) << 24);
        let w5 = (((bytes[15] >> 2) as u32) & 0x3f)
            | ((bytes[14] as u32) << 6)
            | ((bytes[13] as u32) << 14)
            | (((bytes[12] & 0xf) as u32) << 22);
        let w6 = (((bytes[12] >> 4) as u32) & 0xf)
            | ((bytes[11] as u32) << 4)
            | ((bytes[10] as u32) << 12)
            | (((bytes[9] & 0x3f) as u32) << 20);
        let w7 = (((bytes[9] >> 6) as u32) & 0x3)
            | ((bytes[8] as u32) << 2)
            | ((bytes[7] as u32) << 10)
            | ((bytes[6] as u32) << 18);
        let w8 = (bytes[5] as u32)
            | ((bytes[4] as u32) << 8)
            | ((bytes[3] as u32) << 16)
            | (((bytes[2] & 0x3) as u32) << 24);
        let w9 = (((bytes[2] >> 2) as u32) & 0x3f)
            | ((bytes[1] as u32) << 6)
            | ((bytes[0] as u32) << 14);

        Self([w0, w1, w2, w3, w4, w5, w6, w7, w8, w9])
    }

    /// Attempts to parse the given byte array as an SEC1-encoded field element.
    ///
    /// Returns None if the byte array does not contain a big-endian integer in the range
    /// [0, p).
    pub fn from_bytes(bytes: &FieldBytes) -> CtOption<Self> {
        let res = Self::from_bytes_unchecked(bytes.as_ref());
        let overflow = res.get_overflow();

        CtOption::new(res, !overflow)
    }

    pub const fn from_u64(val: u64) -> Self {
        let w0 = (val as u32) & 0x3FFFFFF;
        let val = val >> 26;
        let w1 = (val as u32) & 0x3FFFFFF;
        let w2 = (val >> 26) as u32;
        Self([w0, w1, w2, 0, 0, 0, 0, 0, 0, 0])
    }

    /// Returns the SEC1 encoding of this field element.
    pub fn to_bytes(self) -> FieldBytes {
        let mut r = FieldBytes::default();
        r[0] = (self.0[9] >> 14) as u8;
        r[1] = (self.0[9] >> 6) as u8;
        r[2] = ((self.0[9] as u8 & 0x3Fu8) << 2) | ((self.0[8] >> 24) as u8 & 0x3);
        r[3] = (self.0[8] >> 16) as u8;
        r[4] = (self.0[8] >> 8) as u8;
        r[5] = self.0[8] as u8;
        r[6] = (self.0[7] >> 18) as u8;
        r[7] = (self.0[7] >> 10) as u8;
        r[8] = (self.0[7] >> 2) as u8;
        r[9] = ((self.0[7] as u8 & 0x3u8) << 6) | ((self.0[6] >> 20) as u8 & 0x3fu8);
        r[10] = (self.0[6] >> 12) as u8;
        r[11] = (self.0[6] >> 4) as u8;
        r[12] = ((self.0[6] as u8 & 0xfu8) << 4) | ((self.0[5] >> 22) as u8 & 0xfu8);
        r[13] = (self.0[5] >> 14) as u8;
        r[14] = (self.0[5] >> 6) as u8;
        r[15] = ((self.0[5] as u8 & 0x3fu8) << 2) | ((self.0[4] >> 24) as u8 & 0x3u8);
        r[16] = (self.0[4] >> 16) as u8;
        r[17] = (self.0[4] >> 8) as u8;
        r[18] = self.0[4] as u8;
        r[19] = (self.0[3] >> 18) as u8;
        r[20] = (self.0[3] >> 10) as u8;
        r[21] = (self.0[3] >> 2) as u8;
        r[22] = ((self.0[3] as u8 & 0x3u8) << 6) | ((self.0[2] >> 20) as u8 & 0x3fu8);
        r[23] = (self.0[2] >> 12) as u8;
        r[24] = (self.0[2] >> 4) as u8;
        r[25] = ((self.0[2] as u8 & 0xfu8) << 4) | ((self.0[1] >> 22) as u8 & 0xfu8);
        r[26] = (self.0[1] >> 14) as u8;
        r[27] = (self.0[1] >> 6) as u8;
        r[28] = ((self.0[1] as u8 & 0x3fu8) << 2) | ((self.0[0] >> 24) as u8 & 0x3u8);
        r[29] = (self.0[0] >> 16) as u8;
        r[30] = (self.0[0] >> 8) as u8;
        r[31] = self.0[0] as u8;
        r
    }

    /// Adds `x * (2^256 - modulus)`.
    fn add_modulus_correction(&self, x: u32) -> Self {
        // add (2^256 - modulus) * x to the first limb
        let t0 = self.0[0] + x * 0x3D1u32;

        // Propagate excess bits up the limbs
        let t1 = self.0[1] + (x << 6); // add `x` times the high bit of correction (2^32)
        let t1 = t1 + (t0 >> 26);
        let t0 = t0 & 0x3FFFFFFu32;

        let t2 = self.0[2] + (t1 >> 26);
        let t1 = t1 & 0x3FFFFFFu32;

        let t3 = self.0[3] + (t2 >> 26);
        let t2 = t2 & 0x3FFFFFFu32;

        let t4 = self.0[4] + (t3 >> 26);
        let t3 = t3 & 0x3FFFFFFu32;

        let t5 = self.0[5] + (t4 >> 26);
        let t4 = t4 & 0x3FFFFFFu32;

        let t6 = self.0[6] + (t5 >> 26);
        let t5 = t5 & 0x3FFFFFFu32;

        let t7 = self.0[7] + (t6 >> 26);
        let t6 = t6 & 0x3FFFFFFu32;

        let t8 = self.0[8] + (t7 >> 26);
        let t7 = t7 & 0x3FFFFFFu32;

        let t9 = self.0[9] + (t8 >> 26);
        let t8 = t8 & 0x3FFFFFFu32;

        Self([t0, t1, t2, t3, t4, t5, t6, t7, t8, t9])
    }

    /// Subtracts the overflow in the last limb and return it with the new field element.
    /// Equivalent to subtracting a multiple of 2^256.
    fn subtract_modulus_approximation(&self) -> (Self, u32) {
        let x = self.0[9] >> 22;
        let t9 = self.0[9] & 0x03FFFFFu32; // equivalent to self -= 2^256 * x
        (
            Self([
                self.0[0], self.0[1], self.0[2], self.0[3], self.0[4], self.0[5], self.0[6],
                self.0[7], self.0[8], t9,
            ]),
            x,
        )
    }

    /// Checks if the field element is greater or equal to the modulus.
    fn get_overflow(&self) -> Choice {
        let m = self.0[2] & self.0[3] & self.0[4] & self.0[5] & self.0[6] & self.0[7] & self.0[8];
        let x = (self.0[9] >> 22 != 0)
            | ((self.0[9] == 0x3FFFFFu32)
                & (m == 0x3FFFFFFu32)
                & ((self.0[1] + 0x40u32 + ((self.0[0] + 0x3D1u32) >> 26)) > 0x3FFFFFFu32));
        Choice::from(x as u8)
    }

    /// Brings the field element's magnitude to 1, but does not necessarily normalize it.
    pub fn normalize_weak(&self) -> Self {
        // Reduce t9 at the start so there will be at most a single carry from the first pass
        let (t, x) = self.subtract_modulus_approximation();

        // The first pass ensures the magnitude is 1, ...
        let res = t.add_modulus_correction(x);

        // ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element)
        debug_assert!(res.0[9] >> 23 == 0);

        res
    }

    /// Fully normalizes the field element.
    /// That is, first nine limbs are at most 26 bit large, the last limb is at most 22 bit large,
    /// and the value is less than the modulus.
    pub fn normalize(&self) -> Self {
        let res = self.normalize_weak();

        // At most a single final reduction is needed;
        // check if the value is >= the field characteristic
        let overflow = res.get_overflow();

        // Apply the final reduction (for constant-time behaviour, we do it always)
        let res_corrected = res.add_modulus_correction(1u32);
        // Mask off the possible multiple of 2^256 from the final reduction
        let (res_corrected, x) = res_corrected.subtract_modulus_approximation();

        // If the last limb didn't carry to bit 23 already,
        // then it should have after any final reduction
        debug_assert!(x == (overflow.unwrap_u8() as u32));

        Self::conditional_select(&res, &res_corrected, overflow)
    }

    /// Checks if the field element becomes zero if normalized.
    pub fn normalizes_to_zero(&self) -> Choice {
        let res = self.normalize_weak();

        let t0 = res.0[0];
        let t1 = res.0[1];
        let t2 = res.0[2];
        let t3 = res.0[3];
        let t4 = res.0[4];
        let t5 = res.0[5];
        let t6 = res.0[6];
        let t7 = res.0[7];
        let t8 = res.0[8];
        let t9 = res.0[9];

        // z0 tracks a possible raw value of 0, z1 tracks a possible raw value of the modulus
        let z0 = t0 | t1 | t2 | t3 | t4 | t5 | t6 | t7 | t8 | t9;
        let z1 = (t0 ^ 0x3D0u32)
            & (t1 ^ 0x40u32)
            & t2
            & t3
            & t4
            & t5
            & t6
            & t7
            & t8
            & (t9 ^ 0x3C00000u32);

        Choice::from(((z0 == 0) | (z1 == 0x3FFFFFFu32)) as u8)
    }

    /// Determine if this `FieldElement10x26` is zero.
    ///
    /// # Returns
    ///
    /// If zero, return `Choice(1)`.  Otherwise, return `Choice(0)`.
    pub fn is_zero(&self) -> Choice {
        Choice::from(
            ((self.0[0]
                | self.0[1]
                | self.0[2]
                | self.0[3]
                | self.0[4]
                | self.0[5]
                | self.0[6]
                | self.0[7]
                | self.0[8]
                | self.0[9])
                == 0) as u8,
        )
    }

    /// Determine if this `FieldElement10x26` is odd in the SEC1 sense: `self mod 2 == 1`.
    ///
    /// # Returns
    ///
    /// If odd, return `Choice(1)`.  Otherwise, return `Choice(0)`.
    pub fn is_odd(&self) -> Choice {
        (self.0[0] as u8 & 1).into()
    }

    // The maximum number `m` for which `0x3FFFFFF * 2 * (m + 1) < 2^32`
    pub const fn max_magnitude() -> u32 {
        31u32
    }

    /// Returns -self, treating it as a value of given magnitude.
    /// The provided magnitude must be equal or greater than the actual magnitude of `self`.
    pub const fn negate(&self, magnitude: u32) -> Self {
        let m: u32 = magnitude + 1;
        let r0 = 0x3FFFC2Fu32 * 2 * m - self.0[0];
        let r1 = 0x3FFFFBFu32 * 2 * m - self.0[1];
        let r2 = 0x3FFFFFFu32 * 2 * m - self.0[2];
        let r3 = 0x3FFFFFFu32 * 2 * m - self.0[3];
        let r4 = 0x3FFFFFFu32 * 2 * m - self.0[4];
        let r5 = 0x3FFFFFFu32 * 2 * m - self.0[5];
        let r6 = 0x3FFFFFFu32 * 2 * m - self.0[6];
        let r7 = 0x3FFFFFFu32 * 2 * m - self.0[7];
        let r8 = 0x3FFFFFFu32 * 2 * m - self.0[8];
        let r9 = 0x03FFFFFu32 * 2 * m - self.0[9];
        Self([r0, r1, r2, r3, r4, r5, r6, r7, r8, r9])
    }

    /// Returns self + rhs mod p.
    /// Sums the magnitudes.
    pub const fn add(&self, rhs: &Self) -> Self {
        Self([
            self.0[0] + rhs.0[0],
            self.0[1] + rhs.0[1],
            self.0[2] + rhs.0[2],
            self.0[3] + rhs.0[3],
            self.0[4] + rhs.0[4],
            self.0[5] + rhs.0[5],
            self.0[6] + rhs.0[6],
            self.0[7] + rhs.0[7],
            self.0[8] + rhs.0[8],
            self.0[9] + rhs.0[9],
        ])
    }

    /// Multiplies by a single-limb integer.
    /// Multiplies the magnitude by the same value.
    pub const fn mul_single(&self, rhs: u32) -> Self {
        Self([
            self.0[0] * rhs,
            self.0[1] * rhs,
            self.0[2] * rhs,
            self.0[3] * rhs,
            self.0[4] * rhs,
            self.0[5] * rhs,
            self.0[6] * rhs,
            self.0[7] * rhs,
            self.0[8] * rhs,
            self.0[9] * rhs,
        ])
    }

    #[inline(always)]
    fn mul_inner(&self, rhs: &Self) -> Self {
        /*
        `square()` is just `mul()` with equal arguments. Rust compiler is smart enough
        to do all the necessary optimizations for this case, but it needs to have this information
        inside a function. If a function is just *called* with the same arguments,
        this information cannot be used, so the function must be inlined while using the same arguments.

        Now `mul()` is quite long and therefore expensive to inline. So we have an inner (inlined)
        function, that is used inside `mul()` and `square()`, and when it is used with the same
        arguments in `square()`, compiler is able to use that fact after inlining.
        */

        let m = 0x3FFFFFFu64;
        let rr0 = 0x3D10u64;
        let rr1 = 0x400u64;

        let a0 = self.0[0] as u64;
        let a1 = self.0[1] as u64;
        let a2 = self.0[2] as u64;
        let a3 = self.0[3] as u64;
        let a4 = self.0[4] as u64;
        let a5 = self.0[5] as u64;
        let a6 = self.0[6] as u64;
        let a7 = self.0[7] as u64;
        let a8 = self.0[8] as u64;
        let a9 = self.0[9] as u64;

        let b0 = rhs.0[0] as u64;
        let b1 = rhs.0[1] as u64;
        let b2 = rhs.0[2] as u64;
        let b3 = rhs.0[3] as u64;
        let b4 = rhs.0[4] as u64;
        let b5 = rhs.0[5] as u64;
        let b6 = rhs.0[6] as u64;
        let b7 = rhs.0[7] as u64;
        let b8 = rhs.0[8] as u64;
        let b9 = rhs.0[9] as u64;

        // [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
        // for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
        // for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9)
        // Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*rr1 x*rr0].

        let mut c: u64;
        let mut d: u64;

        d = a0 * b9
            + a1 * b8
            + a2 * b7
            + a3 * b6
            + a4 * b5
            + a5 * b4
            + a6 * b3
            + a7 * b2
            + a8 * b1
            + a9 * b0;
        // [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0]
        let t9 = (d & m) as u32;
        d >>= 26;
        debug_assert!(t9 >> 26 == 0);
        debug_assert!(d >> 38 == 0);
        // [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0]

        c = a0 * b0;
        debug_assert!(c >> 60 == 0);
        // [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0]
        d +=
            a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1;
        debug_assert!(d >> 63 == 0);
        // [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0]
        let u0 = (d & m) as u32;
        d >>= 26;
        c += u0 as u64 * rr0;
        debug_assert!(u0 >> 26 == 0);
        debug_assert!(d >> 37 == 0);
        debug_assert!(c >> 61 == 0);
        // [d u0 t9 0 0 0 0 0 0 0 0 c-u0*rr0] = [p10 p9 0 0 0 0 0 0 0 0 p0]
        let t0 = (c & m) as u32;
        c >>= 26;
        c += u0 as u64 * rr1;
        debug_assert!(t0 >> 26 == 0);
        debug_assert!(c >> 37 == 0);
        // [d u0 t9 0 0 0 0 0 0 0 c-u0*rr1 t0-u0*rr0] = [p10 p9 0 0 0 0 0 0 0 0 p0]
        // [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0]

        c += a0 * b1 + a1 * b0;
        debug_assert!(c >> 62 == 0);
        // [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0]
        d += a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2;
        debug_assert!(d >> 63 == 0);
        // [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
        let u1 = (d & m) as u32;
        d >>= 26;
        c += u1 as u64 * rr0;
        debug_assert!(u1 >> 26 == 0);
        debug_assert!(d >> 37 == 0);
        debug_assert!(c >> 63 == 0);
        // [d u1 0 t9 0 0 0 0 0 0 0 c-u1*rr0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
        let t1 = (c & m) as u32;
        c >>= 26;
        c += u1 as u64 * rr1;
        debug_assert!(t1 >> 26 == 0);
        debug_assert!(c >> 38 == 0);
        // [d u1 0 t9 0 0 0 0 0 0 c-u1*rr1 t1-u1*rr0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]
        // [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0]

        c += a0 * b2 + a1 * b1 + a2 * b0;
        debug_assert!(c >> 62 == 0);
        // [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
        d += a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3;
        debug_assert!(d >> 63 == 0);
        // [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
        let u2 = (d & m) as u32;
        d >>= 26;
        c += u2 as u64 * rr0;
        debug_assert!(u2 >> 26 == 0);
        debug_assert!(d >> 37 == 0);
        debug_assert!(c >> 63 == 0);
        // [d u2 0 0 t9 0 0 0 0 0 0 c-u2*rr0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
        let t2 = (c & m) as u32;
        c >>= 26;
        c += u2 as u64 * rr1;
        debug_assert!(t2 >> 26 == 0);
        debug_assert!(c >> 38 == 0);
        // [d u2 0 0 t9 0 0 0 0 0 c-u2*rr1 t2-u2*rr0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]
        // [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0]

        c += a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
        debug_assert!(c >> 63 == 0);
        // [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
        d += a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4;
        debug_assert!(d >> 63 == 0);
        // [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
        let u3 = (d & m) as u32;
        d >>= 26;
        c += u3 as u64 * rr0;
        debug_assert!(u3 >> 26 == 0);
        debug_assert!(d >> 37 == 0);
        // [d u3 0 0 0 t9 0 0 0 0 0 c-u3*rr0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
        let t3 = (c & m) as u32;
        c >>= 26;
        c += u3 as u64 * rr1;
        debug_assert!(t3 >> 26 == 0);
        debug_assert!(c >> 39 == 0);
        // [d u3 0 0 0 t9 0 0 0 0 c-u3*rr1 t3-u3*rr0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]
        // [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0]

        c += a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
        debug_assert!(c >> 63 == 0);
        // [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
        d += a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5;
        debug_assert!(d >> 62 == 0);
        // [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
        let u4 = (d & m) as u32;
        d >>= 26;
        c += u4 as u64 * rr0;
        debug_assert!(u4 >> 26 == 0);
        debug_assert!(d >> 36 == 0);
        // [d u4 0 0 0 0 t9 0 0 0 0 c-u4*rr0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
        let t4 = (c & m) as u32;
        c >>= 26;
        c += u4 as u64 * rr1;
        debug_assert!(t4 >> 26 == 0);
        debug_assert!(c >> 39 == 0);
        // [d u4 0 0 0 0 t9 0 0 0 c-u4*rr1 t4-u4*rr0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0]

        c += a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
        debug_assert!(c >> 63 == 0);
        // [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
        d += a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6;
        debug_assert!(d >> 62 == 0);
        // [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
        let u5 = (d & m) as u32;
        d >>= 26;
        c += u5 as u64 * rr0;
        debug_assert!(u5 >> 26 == 0);
        debug_assert!(d >> 36 == 0);
        // [d u5 0 0 0 0 0 t9 0 0 0 c-u5*rr0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
        let t5 = (c & m) as u32;
        c >>= 26;
        c += u5 as u64 * rr1;
        debug_assert!(t5 >> 26 == 0);
        debug_assert!(c >> 39 == 0);
        // [d u5 0 0 0 0 0 t9 0 0 c-u5*rr1 t5-u5*rr0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0]

        c += a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
        debug_assert!(c >> 63 == 0);
        // [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
        d += a7 * b9 + a8 * b8 + a9 * b7;
        debug_assert!(d >> 61 == 0);
        // [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
        let u6 = (d & m) as u32;
        d >>= 26;
        c += u6 as u64 * rr0;
        debug_assert!(u6 >> 26 == 0);
        debug_assert!(d >> 35 == 0);
        // [d u6 0 0 0 0 0 0 t9 0 0 c-u6*rr0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
        let t6 = (c & m) as u32;
        c >>= 26;
        c += u6 as u64 * rr1;
        debug_assert!(t6 >> 26 == 0);
        debug_assert!(c >> 39 == 0);
        // [d u6 0 0 0 0 0 0 t9 0 c-u6*rr1 t6-u6*rr0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0]

        c += a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
        debug_assert!(c <= 0x8000007C00000007u64);
        // [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
        d += a8 * b9 + a9 * b8;
        debug_assert!(d >> 58 == 0);
        // [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
        let u7 = (d & m) as u32;
        d >>= 26;
        c += u7 as u64 * rr0;
        debug_assert!(u7 >> 26 == 0);
        debug_assert!(d >> 32 == 0);
        let d32 = d as u32;
        debug_assert!(c <= 0x800001703FFFC2F7u64);
        // [d u7 0 0 0 0 0 0 0 t9 0 c-u7*rr0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
        let t7 = (c & m) as u32;
        c >>= 26;
        c += u7 as u64 * rr1;
        debug_assert!(t7 >> 26 == 0);
        debug_assert!(c >> 38 == 0);
        // [d u7 0 0 0 0 0 0 0 t9 c-u7*rr1 t7-u7*rr0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0]

        c +=
            a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0;
        debug_assert!(c <= 0x9000007B80000008u64);
        // [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        d = d32 as u64 + a9 * b9;
        debug_assert!(d >> 57 == 0);
        // [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let u8 = (d & m) as u32;
        d >>= 26;
        c += u8 as u64 * rr0;
        debug_assert!(u8 >> 26 == 0);
        debug_assert!(d >> 31 == 0);
        let d32 = d as u32;
        debug_assert!(c <= 0x9000016FBFFFC2F8u64);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]

        let r3 = t3;
        debug_assert!(r3 >> 26 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r4 = t4;
        debug_assert!(r4 >> 26 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r5 = t5;
        debug_assert!(r5 >> 26 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r6 = t6;
        debug_assert!(r6 >> 26 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r7 = t7;
        debug_assert!(r7 >> 26 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9 c-u8*rr0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]

        let r8 = (c & m) as u32;
        c >>= 26;
        c += u8 as u64 * rr1;
        debug_assert!(r8 >> 26 == 0);
        debug_assert!(c >> 39 == 0);
        // [d u8 0 0 0 0 0 0 0 0 t9+c-u8*rr1 r8-u8*rr0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        c += d32 as u64 * rr0 + t9 as u64;
        debug_assert!(c >> 45 == 0);
        // [d 0 0 0 0 0 0 0 0 0 c-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r9 = (c & (m >> 4)) as u32;
        c >>= 22;
        c += d * (rr1 << 4);
        debug_assert!(r9 >> 22 == 0);
        debug_assert!(c >> 46 == 0);
        // [d 0 0 0 0 0 0 0 0 r9+((c-d*rr1<<4)<<22)-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        // [d 0 0 0 0 0 0 0 -d*rr1 r9+(c<<22)-d*rr0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        // [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]

        d = c * (rr0 >> 4) + t0 as u64;
        debug_assert!(d >> 56 == 0);
        // [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r0 = (d & m) as u32;
        d >>= 26;
        debug_assert!(r0 >> 26 == 0);
        debug_assert!(d >> 30 == 0);
        let d32 = d as u32;
        // [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        d = d32 as u64 + c * (rr1 >> 4) + t1 as u64;
        debug_assert!(d >> 53 == 0);
        debug_assert!(d <= 0x10000003FFFFBFu64);
        // [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*rr1>>4 r0-c*rr0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        // [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r1 = (d & m) as u32;
        d >>= 26;
        debug_assert!(r1 >> 26 == 0);
        debug_assert!(d >> 27 == 0);
        let d32 = d as u32;
        debug_assert!(d <= 0x4000000u64);
        // [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        d = d32 as u64 + t2 as u64;
        debug_assert!(d >> 27 == 0);
        // [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]
        let r2 = d as u32;
        debug_assert!(r2 >> 27 == 0);
        // [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0]

        Self([r0, r1, r2, r3, r4, r5, r6, r7, r8, r9])
    }

    /// Returns self * rhs mod p
    /// Brings the magnitude to 1 (but doesn't normalize the result).
    /// The magnitudes of arguments should be <= 8.
    pub fn mul(&self, rhs: &Self) -> Self {
        self.mul_inner(rhs)
    }

    /// Returns self * self
    /// Brings the magnitude to 1 (but doesn't normalize the result).
    /// The magnitudes of arguments should be <= 8.
    pub fn square(&self) -> Self {
        self.mul_inner(self)
    }

    pub fn pow2k(&self, k: u32) -> Self {
        let mut x = *self;
        for _j in 0..k {
            x = x.square();
        }
        x
    }

    /// Returns the multiplicative inverse of self, if self is non-zero.
    /// The result has magnitude 1, but is not normalized.
    pub fn invert(&self) -> Self {
        let x2 = self.pow2k(1).mul(self);
        let x3 = x2.pow2k(1).mul(self);
        let x6 = x3.pow2k(3).mul(&x3);
        let x9 = x6.pow2k(3).mul(&x3);
        let x11 = x9.pow2k(2).mul(&x2);
        let x22 = x11.pow2k(11).mul(&x11);
        let x44 = x22.pow2k(22).mul(&x22);
        let x88 = x44.pow2k(44).mul(&x44);
        let x176 = x88.pow2k(88).mul(&x88);
        let x220 = x176.pow2k(44).mul(&x44);
        let x223 = x220.pow2k(3).mul(&x3);

        // The final result is then assembled using a sliding window over the blocks.
        x223.pow2k(23)
            .mul(&x22)
            .pow2k(5)
            .mul(self)
            .pow2k(3)
            .mul(&x2)
            .pow2k(2)
            .mul(self)
    }

    /// Returns the square root of self mod p, or `None` if no square root exists.
    /// The result has magnitude 1, but is not normalized.
    pub fn sqrt(&self) -> CtOption<Self> {
        let x2 = self.pow2k(1).mul(self);
        let x3 = x2.pow2k(1).mul(self);
        let x6 = x3.pow2k(3).mul(&x3);
        let x9 = x6.pow2k(3).mul(&x3);
        let x11 = x9.pow2k(2).mul(&x2);
        let x22 = x11.pow2k(11).mul(&x11);
        let x44 = x22.pow2k(22).mul(&x22);
        let x88 = x44.pow2k(44).mul(&x44);
        let x176 = x88.pow2k(88).mul(&x88);
        let x220 = x176.pow2k(44).mul(&x44);
        let x223 = x220.pow2k(3).mul(&x3);

        // The final result is then assembled using a sliding window over the blocks.
        let res = x223.pow2k(23).mul(&x22).pow2k(6).mul(&x2).pow2k(2);

        let is_root = (res.mul(&res).negate(1).add(self)).normalizes_to_zero();

        // Only return Some if it's the square root.
        CtOption::new(res, is_root)
    }
}

impl Default for FieldElement10x26 {
    fn default() -> Self {
        Self::ZERO
    }
}

impl ConditionallySelectable for FieldElement10x26 {
    #[inline(always)]
    fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
        Self([
            u32::conditional_select(&a.0[0], &b.0[0], choice),
            u32::conditional_select(&a.0[1], &b.0[1], choice),
            u32::conditional_select(&a.0[2], &b.0[2], choice),
            u32::conditional_select(&a.0[3], &b.0[3], choice),
            u32::conditional_select(&a.0[4], &b.0[4], choice),
            u32::conditional_select(&a.0[5], &b.0[5], choice),
            u32::conditional_select(&a.0[6], &b.0[6], choice),
            u32::conditional_select(&a.0[7], &b.0[7], choice),
            u32::conditional_select(&a.0[8], &b.0[8], choice),
            u32::conditional_select(&a.0[9], &b.0[9], choice),
        ])
    }
}

impl ConstantTimeEq for FieldElement10x26 {
    fn ct_eq(&self, other: &Self) -> Choice {
        self.0[0].ct_eq(&other.0[0])
            & self.0[1].ct_eq(&other.0[1])
            & self.0[2].ct_eq(&other.0[2])
            & self.0[3].ct_eq(&other.0[3])
            & self.0[4].ct_eq(&other.0[4])
            & self.0[5].ct_eq(&other.0[5])
            & self.0[6].ct_eq(&other.0[6])
            & self.0[7].ct_eq(&other.0[7])
            & self.0[8].ct_eq(&other.0[8])
            & self.0[9].ct_eq(&other.0[9])
    }
}

impl Zeroize for FieldElement10x26 {
    fn zeroize(&mut self) {
        self.0.zeroize();
    }
}


================================================
FILE: openvm-riscv/extensions/hints-circuit/src/lib.rs
================================================
#![cfg_attr(feature = "tco", allow(internal_features))]
#![cfg_attr(feature = "tco", allow(incomplete_features))]
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
#![cfg_attr(feature = "tco", feature(core_intrinsics))]

use openvm_circuit::arch::{
    AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutorInventoryBuilder,
    ExecutorInventoryError, VmCircuitExtension, VmExecutionExtension, VmProverExtension,
};
use openvm_circuit::derive::{
    AnyEnum, AotExecutor, AotMeteredExecutor, Executor, MeteredExecutor, PreflightExecutor,
};
use openvm_circuit::system::phantom::PhantomExecutor;
use openvm_instructions::PhantomDiscriminant;
use openvm_stark_backend::config::{StarkGenericConfig, Val};
use openvm_stark_backend::p3_field::{Field, PrimeField32};
use openvm_stark_sdk::engine::StarkEngine;
use powdr_openvm_riscv_hints_transpiler::HintsPhantom;
use serde::{Deserialize, Serialize};

// this module is mostly copy/pasted code from k256 for the field element representation in 32-bit architectures
mod executors;
mod field10x26_k256;

/// OpenVM extension with miscellaneous hint implementations.
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct HintsExtension;

#[derive(
    AnyEnum, PreflightExecutor, Executor, MeteredExecutor, AotExecutor, AotMeteredExecutor, Clone,
)]
pub enum HintsExtensionExecutor<F: Field> {
    Phantom(PhantomExecutor<F>),
}

impl<F: PrimeField32> VmExecutionExtension<F> for HintsExtension {
    type Executor = HintsExtensionExecutor<F>;

    fn extend_execution(
        &self,
        inventory: &mut ExecutorInventoryBuilder<F, Self::Executor>,
    ) -> Result<(), ExecutorInventoryError> {
        inventory.add_phantom_sub_executor(
            executors::ReverseBytesSubEx,
            PhantomDiscriminant(HintsPhantom::HintReverseBytes as u16),
        )?;
        inventory.add_phantom_sub_executor(
            executors::K256InverseFieldSubEx,
            PhantomDiscriminant(HintsPhantom::HintK256InverseField as u16),
        )?;
        inventory.add_phantom_sub_executor(
            executors::K256InverseField10x26SubEx,
            PhantomDiscriminant(HintsPhantom::HintK256InverseField10x26 as u16),
        )?;
        inventory.add_phantom_sub_executor(
            executors::K256SqrtField10x26SubEx,
            PhantomDiscriminant(HintsPhantom::HintK256SqrtField10x26 as u16),
        )?;
        Ok(())
    }
}

impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for HintsExtension {
    fn extend_circuit(&self, _: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
        Ok(())
    }
}

pub struct HintsProverExt;

impl<E, RA> VmProverExtension<E, RA, HintsExtension> for HintsProverExt
where
    E: StarkEngine,
    Val<E::SC>: PrimeField32,
{
    fn extend_prover(
        &self,
        _: &HintsExtension,
        _: &mut ChipInventory<E::SC, RA, E::PB>,
    ) -> Result<(), ChipInventoryError> {
        // No chips to add for hints
        Ok(())
    }
}


================================================
FILE: openvm-riscv/extensions/hints-guest/Cargo.toml
================================================
[package]
name = "powdr-openvm-riscv-hints-guest"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[target.'cfg(target_os = "zkvm")'.dependencies]
openvm-platform = { workspace = true, features = ["rust-runtime"] }
openvm-rv32im-guest.workspace = true
openvm-custom-insn.workspace = true

[dependencies]
strum_macros = "0.27"


================================================
FILE: openvm-riscv/extensions/hints-guest/src/lib.rs
================================================
#![no_std]
#[cfg(target_os = "zkvm")]
use openvm_custom_insn; // needed for the hint_store_u32 macro
use strum_macros::FromRepr;

/// This is custom-2 defined in RISC-V spec document
pub const OPCODE: u8 = 0x5b;
pub const HINTS_FUNCT3: u8 = 0b000;

#[derive(Debug, Copy, Clone, PartialEq, Eq, FromRepr)]
#[repr(u8)]
pub enum HintsFunct7 {
    ReverseBytes = 0,
    K256InverseField,
    K256InverseField10x26,
    K256SqrtField10x26,
}

#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_reverse_bytes(bytes: *const u8) {
    openvm_platform::custom_insn_r!(
        opcode = OPCODE,
        funct3 = HINTS_FUNCT3,
        funct7 = HintsFunct7::ReverseBytes as u8,
        rd = Const "x0",
        rs1 = In bytes,
        rs2 = Const "x0"
    );
}

#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_inverse_field(bytes: *const u8) {
    openvm_platform::custom_insn_r!(
        opcode = OPCODE,
        funct3 = HINTS_FUNCT3,
        funct7 = HintsFunct7::K256InverseField as u8,
        rd = Const "x0",
        rs1 = In bytes,
        rs2 = Const "x0"
    );
}

#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_inverse_field_10x26(bytes: *const u8) {
    openvm_platform::custom_insn_r!(
        opcode = OPCODE,
        funct3 = HINTS_FUNCT3,
        funct7 = HintsFunct7::K256InverseField10x26 as u8,
        rd = Const "x0",
        rs1 = In bytes,
        rs2 = Const "x0",
    );
}

#[cfg(target_os = "zkvm")]
#[inline(always)]
fn insn_k256_sqrt_field_10x26(bytes: *const u8) {
    openvm_platform::custom_insn_r!(
        opcode = OPCODE,
        funct3 = HINTS_FUNCT3,
        funct7 = HintsFunct7::K256SqrtField10x26 as u8,
        rd = Const "x0",
        rs1 = In bytes,
        rs2 = Const "x0",
    );
}

/// Just an example hint that reverses the bytes of a u32 value.
pub fn hint_reverse_bytes(val: u32) -> u32 {
    #[cfg(target_os = "zkvm")]
    {
        let result = core::mem::MaybeUninit::<u32>::uninit();
        insn_reverse_bytes(&val as *const u32 as *const u8);
        unsafe {
            openvm_rv32im_guest::hint_store_u32!(result.as_ptr() as *const u32);
            result.assume_init()
        }
    }
    #[cfg(not(target_os = "zkvm"))]
    {
        ((val & 0x000000FF) << 24)
            | ((val & 0x0000FF00) << 8)
            | ((val & 0x00FF0000) >> 8)
            | ((val & 0xFF000000) >> 24)
    }
}

/// Inverse of field element in SECP256k1 modulus (if not zero).
/// The caller is responsible for handling the zero input case, and the returned value is zero in that case.
#[cfg(target_os = "zkvm")]
pub fn hint_k256_inverse_field(sec1_bytes: &[u8]) -> [u8; 32] {
    insn_k256_inverse_field(sec1_bytes.as_ptr() as *const u8);
    let inverse = core::mem::MaybeUninit::<[u8; 32]>::uninit();
    unsafe {
        openvm_rv32im_guest::hint_buffer_u32!(inverse.as_ptr() as *const u8, 8);
        inverse.assume_init()
    }
}

/// Ensures that the 10 limbs are weakly normalized (i.e., the most significant limb is 22 bits and the others are 26 bits).
/// For an honest prover, this is a no-op.
#[cfg(target_os = "zkvm")]
fn ensure_weakly_normalized_10x26(limbs: [u32; 10]) -> [u32; 10] {
    [
        limbs[0] & 0x3ffffff,
        limbs[1] & 0x3ffffff,
        limbs[2] & 0x3ffffff,
        limbs[3] & 0x3ffffff,
        limbs[4] & 0x3ffffff,
        limbs[5] & 0x3ffffff,
        limbs[6] & 0x3ffffff,
        limbs[7] & 0x3ffffff,
        limbs[8] & 0x3ffffff,
        limbs[9] & 0x3fffff,
    ]
}

/// Inverse of field element in SECP256k1 modulus (if not zero).
/// Takes in the raw 32-bit architecture representation of the field element from k256 (`FieldElement10x26`).
/// It is guaranteed to be weakly normalized, i.e., the most significant limb is 22 bits and the other
/// limbs are 26 bits long.
/// The caller is responsible for handling the zero input case, and the returned value is undefined in that case.
#[cfg(target_os = "zkvm")]
pub fn hint_k256_inverse_field_10x26(elem: [u32; 10]) -> [u32; 10] {
    insn_k256_inverse_field_10x26(elem.as_ptr() as *const u8);
    let inverse = core::mem::MaybeUninit::<[u32; 10]>::uninit();
    let inverse = unsafe {
        openvm_rv32im_guest::hint_buffer_u32!(inverse.as_ptr() as *const u8, 10);
        inverse.assume_init()
    };
    ensure_weakly_normalized_10x26(inverse)
}

/// Pre-defined non-quadratic residue for k256.
/// The guest should use this value to prove the non-square case.
pub const K256_NON_QUADRATIC_RESIDUE: [u32; 10] = [3, 0, 0, 0, 0, 0, 0, 0, 0, 0];

/// If the input is square, returns true and the square root in the same representation.
/// It is guaranteed to be weakly normalized, i.e., the most significant limb is 22 bits and the other
/// limbs are 26 bits long.
/// If the input is non-square, returns false and the square root of the element times a pre-defined non-quadratic residue.
#[cfg(target_os = "zkvm")]
pub fn hint_k256_sqrt_field_10x26(elem: [u32; 10]) -> (bool, [u32; 10]) {
    insn_k256_sqrt_field_10x26(elem.as_ptr() as *const u8);
    // read the "boolean" result
    let has_sqrt = unsafe {
        let has_sqrt = core::mem::MaybeUninit::<u32>::uninit();
        openvm_rv32im_guest::hint_store_u32!(has_sqrt.as_ptr() as *const u32);
        has_sqrt.assume_init() != 0
    };
    // read the square root value
    let sqrt = unsafe {
        let sqrt = core::mem::MaybeUninit::<[u32; 10]>::uninit();
        openvm_rv32im_guest::hint_buffer_u32!(sqrt.as_ptr() as *const u8, 10);
        sqrt.assume_init()
    };
    let sqrt = ensure_weakly_normalized_10x26(sqrt);
    (has_sqrt, sqrt)
}


================================================
FILE: openvm-riscv/extensions/hints-transpiler/Cargo.toml
================================================
[package]
name = "powdr-openvm-riscv-hints-transpiler"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[dependencies]
openvm-stark-backend = { workspace = true }
openvm-instructions = { workspace = true }
openvm-transpiler = { workspace = true }
openvm-instructions-derive = { workspace = true }
rrs-lib = "0.1.0"
strum = { version = "0.27", features = ["derive"] }

powdr-openvm-riscv-hints-guest = { workspace = true }


================================================
FILE: openvm-riscv/extensions/hints-transpiler/src/lib.rs
================================================
use openvm_instructions::{
    instruction::Instruction, riscv::RV32_REGISTER_NUM_LIMBS, LocalOpcode, PhantomDiscriminant,
};
use openvm_instructions_derive::LocalOpcode;
use openvm_stark_backend::p3_field::PrimeField32;
use openvm_transpiler::{TranspilerExtension, TranspilerOutput};
use powdr_openvm_riscv_hints_guest::{HintsFunct7, HINTS_FUNCT3, OPCODE};
use rrs_lib::instruction_formats::RType;
use strum::{EnumCount, EnumIter, FromRepr};

#[derive(
    Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, LocalOpcode,
)]
#[opcode_offset = 0x800]
#[repr(usize)]
pub enum HintsOpcode {
    HINTS,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq, FromRepr)]
#[repr(u16)]
pub enum HintsPhantom {
    // idk if there is a "proper" way for avoiding conflicts in this number,
    // just looked at ovm code and picked the next range that didn't seem to be
    // used
    HintReverseBytes = 0x60,
    HintK256InverseField = 0x61,
    HintK256InverseField10x26 = 0x62,
    HintK256SqrtField10x26 = 0x63,
}

#[derive(Default)]
pub struct HintsTranspilerExtension;

impl<F: PrimeField32> TranspilerExtension<F> for HintsTranspilerExtension {
    fn process_custom(&self, instruction_stream: &[u32]) -> Option<TranspilerOutput<F>> {
        if instruction_stream.is_empty() {
            return None;
        }
        let instruction_u32 = instruction_stream[0];
        let opcode = (instruction_u32 & 0x7f) as u8;
        if opcode != OPCODE {
            return None;
        }

        let insn = RType::new(instruction_u32);
        if insn.funct3 as u8 != HINTS_FUNCT3 {
            return None;
        }

        let funct7 = HintsFunct7::from_repr(insn.funct7 as u8)?;
        let disc = match funct7 {
            HintsFunct7::ReverseBytes => HintsPhantom::HintReverseBytes,
            HintsFunct7::K256InverseField => HintsPhantom::HintK256InverseField,
            HintsFunct7::K256InverseField10x26 => HintsPhantom::HintK256InverseField10x26,
            HintsFunct7::K256SqrtField10x26 => HintsPhantom::HintK256SqrtField10x26,
        };

        let instruction = Instruction::phantom(
            PhantomDiscriminant(disc as u16),
            F::from_canonical_usize(RV32_REGISTER_NUM_LIMBS * insn.rs1),
            F::ZERO,
            0,
        );

        Some(TranspilerOutput::one_to_one(instruction))
    }
}


================================================
FILE: openvm-riscv/guest/Cargo.toml
================================================
[workspace]
[package]
name = "powdr-openvm-guest-stdin-test"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }

[profile.release-with-debug]
inherits = "release"
debug = true


================================================
FILE: openvm-riscv/guest/src/main.rs
================================================
#![cfg_attr(target_os = "zkvm", no_main)]
#![cfg_attr(target_os = "zkvm", no_std)]

openvm::entry!(main);

use openvm::io::{read, reveal_u32};

pub fn main() {
    let n: u32 = read();
    let mut a: u32 = 0;
    let mut b: u32 = 1;
    for _ in 1..n {
        let sum = a + b;
        a = b;
        b = sum;
    }
    if a == 0 {
        panic!();
    }

    reveal_u32(a, 0);
}


================================================
FILE: openvm-riscv/guest-ecc-manual/Cargo.toml
================================================

[workspace]
[package]
name = "openvm-ecc-test-programs"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
openvm-ecc-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", subdirectory = "extensions/ecc/guest", default-features = false }
openvm-algebra-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", subdirectory = "extensions/algebra/guest", default-features = false }
openvm-k256 = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", subdirectory = "guest-libs/k256", package = "k256", features = [
  "ecdsa",
] }

hex-literal = { version = "0.4.1", default-features = false }


================================================
FILE: openvm-riscv/guest-ecc-manual/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]

[app_vm_config.modular]
supported_moduli = [
    "115792089237316195423570985008687907853269984665640564039457584007908834671663",
    "115792089237316195423570985008687907852837564279074904382605163141518161494337",
]

[[app_vm_config.ecc.supported_curves]]
struct_name = "Secp256k1Point"
modulus = "115792089237316195423570985008687907853269984665640564039457584007908834671663"
scalar = "115792089237316195423570985008687907852837564279074904382605163141518161494337"
a = "0"
b = "7"

================================================
FILE: openvm-riscv/guest-ecc-manual/src/main.rs
================================================
use hex_literal::hex;
use openvm::io::read;
use openvm_algebra_guest::IntMod;
use openvm_ecc_guest::{weierstrass::IntrinsicCurve, weierstrass::WeierstrassPoint};
use openvm_k256::{Secp256k1, Secp256k1Coord, Secp256k1Point, Secp256k1Scalar};

openvm::init!();

openvm::entry!(main);

pub fn main() {
    let x1 = Secp256k1Coord::from_be_bytes(&[
        177, 205, 72, 85, 29, 179, 168, 198, 125, 68, 123, 98, 49, 165, 115, 23, 117, 100, 184, 12,
        125, 99, 103, 18, 245, 130, 15, 91, 76, 105, 85, 20,
    ])
    .expect("");
    let y1 = Secp256k1Coord::from_be_bytes(&[
        219, 130, 184, 163, 86, 144, 60, 160, 181, 38, 124, 67, 141, 79, 174, 63, 60, 188, 208,
        206, 139, 94, 72, 251, 222, 58, 13, 159, 189, 75, 97, 12,
    ])
    .expect("");
    let x2 = Secp256k1Coord::from_be_bytes(&[
        146, 161, 155, 83, 76, 248, 129, 31, 87, 66, 55, 228, 112, 251, 3, 121, 113, 60, 97, 168,
        52, 94, 83, 10, 224, 229, 14, 231, 182, 207, 33, 28,
    ])
    .expect("");
    let y2 = Secp256k1Coord::from_be_bytes(&[
        163, 84, 112, 69, 78, 54, 106, 228, 95, 24, 73, 7, 216, 178, 14, 141, 200, 150, 92, 72, 29,
        246, 91, 179, 165, 11, 29, 36, 68, 96, 135, 19,
    ])
    .expect("");

    let p1 = Secp256k1Point::from_xy(x1, y1).unwrap();
    let p2 = Secp256k1Point::from_xy(x2, y2).unwrap();

    let scalar_1 = Secp256k1Scalar::from_be_bytes(&hex!(
        "BFD5D7FA526B6954945C980C6C804E0E19840F2DA009C8B0C9A511189FB466BF"
    ))
    .expect("");
    let scalar_2 = Secp256k1Scalar::from_be_bytes(&hex!(
        "369E07A2FC32462DD74AB67CE7D7595EC91FC11CC90A3C15A94B57A21E878614"
    ))
    .expect("");

    let result_x = Secp256k1Coord::from_be_bytes(&[
        112, 170, 75, 207, 229, 212, 237, 2, 131, 65, 143, 232, 168, 46, 48, 240, 56, 164, 245,
        167, 23, 29, 43, 132, 130, 181, 145, 207, 3, 49, 25, 48,
    ])
    .expect("");
    let result_y = Secp256k1Coord::from_be_bytes(&[
        225, 222, 233, 182, 14, 157, 47, 22, 177, 249, 107, 145, 57, 77, 133, 68, 6, 102, 101, 78,
        5, 249, 10, 81, 202, 112, 204, 76, 117, 7, 231, 160,
    ])
    .expect("");
    let mut result = <Secp256k1 as IntrinsicCurve>::msm(&[scalar_1, scalar_2], &[p1, p2]);

    assert_eq!(result.x(), &result_x);
    assert_eq!(result.y(), &result_y);

    // Benchmark
    let n: u32 = read();
    for _ in 0..n {
        result = <Secp256k1 as IntrinsicCurve>::msm(&[scalar_1, scalar_2], &[result, result]);
    }
}


================================================
FILE: openvm-riscv/guest-ecc-powdr-affine-hint/Cargo.toml
================================================
[workspace]
[package]
name = "openvm-ecc-powdr-affine-hint"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
k256 = { git = "https://github.com/powdr-labs/elliptic-curves-k256", rev = "a48ad5c", default-features = false, features = [
  "expose-field",
  "arithmetic",
] }
hex-literal = "1.0.0"


================================================
FILE: openvm-riscv/guest-ecc-powdr-affine-hint/src/main.rs
================================================
use hex_literal::hex;
use k256::elliptic_curve::sec1::FromEncodedPoint;
use k256::elliptic_curve::PrimeField;
use k256::PowdrAffinePoint;
use k256::{AffinePoint, EncodedPoint, FieldBytes, FieldElement, Scalar};
use openvm::io::read;

openvm::entry!(main);

pub fn main() {
    let x1 = &FieldBytes::from_slice(&[
        177, 205, 72, 85, 29, 179, 168, 198, 125, 68, 123, 98, 49, 165, 115, 23, 117, 100, 184, 12,
        125, 99, 103, 18, 245, 130, 15, 91, 76, 105, 85, 20,
    ]);
    let y1 = &FieldBytes::from_slice(&[
        219, 130, 184, 163, 86, 144, 60, 160, 181, 38, 124, 67, 141, 79, 174, 63, 60, 188, 208,
        206, 139, 94, 72, 251, 222, 58, 13, 159, 189, 75, 97, 12,
    ]);

    let x2 = &FieldBytes::from_slice(&[
        146, 161, 155, 83, 76, 248, 129, 31, 87, 66, 55, 228, 112, 251, 3, 121, 113, 60, 97, 168,
        52, 94, 83, 10, 224, 229, 14, 231, 182, 207, 33, 28,
    ]);
    let y2 = &FieldBytes::from_slice(&[
        163, 84, 112, 69, 78, 54, 106, 228, 95, 24, 73, 7, 216, 178, 14, 141, 200, 150, 92, 72, 29,
        246, 91, 179, 165, 11, 29, 36, 68, 96, 135, 19,
    ]);

    let point1 = PowdrAffinePoint(
        AffinePoint::from_encoded_point(&EncodedPoint::from_affine_coordinates(x1, y1, false))
            .expect("AffinePoint should be valid"),
    );
    let point2 = PowdrAffinePoint(
        AffinePoint::from_encoded_point(&EncodedPoint::from_affine_coordinates(x2, y2, false))
            .expect("AffinePoint should be valid"),
    );

    let result_x: FieldElement = FieldElement::from_bytes(FieldBytes::from_slice(&[
        112, 170, 75, 207, 229, 212, 237, 2, 131, 65, 143, 232, 168, 46, 48, 240, 56, 164, 245,
        167, 23, 29, 43, 132, 130, 181, 145, 207, 3, 49, 25, 48,
    ]))
    .unwrap()
    .normalize();

    let result_y: FieldElement = FieldElement::from_bytes(FieldBytes::from_slice(&[
        225, 222, 233, 182, 14, 157, 47, 22, 177, 249, 107, 145, 57, 77, 133, 68, 6, 102, 101, 78,
        5, 249, 10, 81, 202, 112, 204, 76, 117, 7, 231, 160,
    ]))
    .unwrap()
    .normalize();

    let scalar_1 = Scalar::from_repr(*FieldBytes::from_slice(&hex!(
        "BFD5D7FA526B6954945C980C6C804E0E19840F2DA009C8B0C9A511189FB466BF"
    )))
    .unwrap();

    let scalar_2 = Scalar::from_repr(*FieldBytes::from_slice(&hex!(
        "369E07A2FC32462DD74AB67CE7D7595EC91FC11CC90A3C15A94B57A21E878614"
    )))
    .unwrap();

    // Multi scalar multiplication
    let mut result = PowdrAffinePoint::lincomb(&[(point1, scalar_1), (point2, scalar_2)]);
    assert_eq!(result.x().normalize(), result_x);
    assert_eq!(result.y().normalize(), result_y);

    // Benchmark
    let n: u32 = read();
    for _ in 0..n {
        result =
            PowdrAffinePoint::lincomb(&[(result.clone(), scalar_1), (result.clone(), scalar_2)]);
    }
}


================================================
FILE: openvm-riscv/guest-ecc-projective/Cargo.toml
================================================
[workspace]
[package]
name = "openvm-ecc-test-programs"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
k256 = { version = "0.13", default-features = false, features = ["arithmetic"] }
hex-literal = "1.0.0"


================================================
FILE: openvm-riscv/guest-ecc-projective/src/main.rs
================================================
use hex_literal::hex;
use k256::elliptic_curve::ops::LinearCombination;
use k256::elliptic_curve::sec1::FromEncodedPoint;
use k256::elliptic_curve::PrimeField;
use k256::{AffinePoint, EncodedPoint, FieldBytes, ProjectivePoint, Scalar};
use openvm::io::read;

openvm::entry!(main);

pub fn main() {
    let x1 = FieldBytes::from_slice(&[
        177, 205, 72, 85, 29, 179, 168, 198, 125, 68, 123, 98, 49, 165, 115, 23, 117, 100, 184, 12,
        125, 99, 103, 18, 245, 130, 15, 91, 76, 105, 85, 20,
    ]);

    let y1 = FieldBytes::from_slice(&[
        219, 130, 184, 163, 86, 144, 60, 160, 181, 38, 124, 67, 141, 79, 174, 63, 60, 188, 208,
        206, 139, 94, 72, 251, 222, 58, 13, 159, 189, 75, 97, 12,
    ]);

    let x2 = FieldBytes::from_slice(&[
        146, 161, 155, 83, 76, 248, 129, 31, 87, 66, 55, 228, 112, 251, 3, 121, 113, 60, 97, 168,
        52, 94, 83, 10, 224, 229, 14, 231, 182, 207, 33, 28,
    ]);

    let y2 = FieldBytes::from_slice(&[
        163, 84, 112, 69, 78, 54, 106, 228, 95, 24, 73, 7, 216, 178, 14, 141, 200, 150, 92, 72, 29,
        246, 91, 179, 165, 11, 29, 36, 68, 96, 135, 19,
    ]);

    let point1 =
        AffinePoint::from_encoded_point(&EncodedPoint::from_affine_coordinates(x1, y1, false))
            .expect("AffinePoint should be valid");
    let point2 =
        AffinePoint::from_encoded_point(&EncodedPoint::from_affine_coordinates(x2, y2, false))
            .expect("AffinePoint should be valid");

    let a = ProjectivePoint::from(point1);
    let b = ProjectivePoint::from(point2);

    let scalar_1 = Scalar::from_repr(*FieldBytes::from_slice(
        hex!("BFD5D7FA526B6954945C980C6C804E0E19840F2DA009C8B0C9A511189FB466BF").as_ref(),
    ))
    .unwrap();

    let scalar_2 = Scalar::from_repr(*FieldBytes::from_slice(
        hex!("369E07A2FC32462DD74AB67CE7D7595EC91FC11CC90A3C15A94B57A21E878614").as_ref(),
    ))
    .unwrap();

    let result_x = FieldBytes::from_slice(&[
        112, 170, 75, 207, 229, 212, 237, 2, 131, 65, 143, 232, 168, 46, 48, 240, 56, 164, 245,
        167, 23, 29, 43, 132, 130, 181, 145, 207, 3, 49, 25, 48,
    ]);
    let result_y = FieldBytes::from_slice(&[
        225, 222, 233, 182, 14, 157, 47, 22, 177, 249, 107, 145, 57, 77, 133, 68, 6, 102, 101, 78,
        5, 249, 10, 81, 202, 112, 204, 76, 117, 7, 231, 160,
    ]);

    let result_point = AffinePoint::from_encoded_point(&EncodedPoint::from_affine_coordinates(
        result_x, result_y, false,
    ))
    .expect("AffinePoint should be valid");

    let mut result = ProjectivePoint::lincomb(&a, &scalar_1, &b, &scalar_2);
    assert_eq!(result.to_affine(), result_point);

    // Benchmark
    let n: u32 = read();
    for _ in 0..n {
        result = ProjectivePoint::lincomb(&result, &scalar_1, &result, &scalar_2);
    }
}


================================================
FILE: openvm-riscv/guest-ecrecover/Cargo.toml
================================================
[workspace]
[package]
name = "openvm-k256-ecrecover-programs"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
k256 = { git = "https://github.com/powdr-labs/elliptic-curves-k256", rev = "a48ad5c", default-features = false, features = [
  "expose-field",
  "arithmetic",
  "ecdsa",
] }
hex-literal = { version = "0.4.1", default-features = false }


================================================
FILE: openvm-riscv/guest-ecrecover/src/main.rs
================================================
openvm::entry!(main);

use hex_literal::hex;
use k256::ecdsa::{PowdrVerifyKey, RecoveryId, Signature, VerifyingKey};
use k256::EncodedPoint;
use openvm::io::read;

// Signature recovery test vectors
struct RecoveryTestVector {
    pk: [u8; 33],
    sig: [u8; 64],
    recid: RecoveryId,
}

const RECOVERY_TEST_VECTORS: &[RecoveryTestVector] = &[
    // Recovery ID 0
    RecoveryTestVector {
        pk: hex!("021a7a569e91dbf60581509c7fc946d1003b60c7dee85299538db6353538d59574"),
        sig: hex!(
            "ce53abb3721bafc561408ce8ff99c909f7f0b18a2f788649d6470162ab1aa032
                 3971edc523a6d6453f3fb6128d318d9db1a5ff3386feb1047d9816e780039d52"
        ),
        recid: RecoveryId::new(false, false),
    },
    // Recovery ID 1
    RecoveryTestVector {
        pk: hex!("036d6caac248af96f6afa7f904f550253a0f3ef3f5aa2fe6838a95b216691468e2"),
        sig: hex!(
            "46c05b6368a44b8810d79859441d819b8e7cdc8bfd371e35c53196f4bcacdb51
                 35c7facce2a97b95eacba8a586d87b7958aaf8368ab29cee481f76e871dbd9cb"
        ),
        recid: RecoveryId::new(true, false),
    },
];

//Test public key recovery
pub fn main() {
    let n: u32 = read();
    for _ in 0..n {
        for vector in RECOVERY_TEST_VECTORS {
            let digest = [
                173, 132, 205, 11, 16, 252, 2, 135, 56, 151, 27, 7, 129, 36, 174, 194, 160, 231,
                198, 217, 134, 163, 129, 190, 11, 56, 111, 50, 190, 232, 135, 175,
            ];
            let sig = Signature::try_from(vector.sig.as_slice()).unwrap();
            let recid = vector.recid;
            let pk = <VerifyingKey as PowdrVerifyKey>::powdr_recover_from_prehash(
                digest.as_slice(),
                &sig,
                recid,
            )
            .unwrap();
            assert_eq!(&vector.pk[..], EncodedPoint::from(&pk).as_bytes());
        }
    }
}


================================================
FILE: openvm-riscv/guest-ecrecover-manual/Cargo.toml
================================================
[workspace]
[package]
name = "openvm-k256-test-programs"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
openvm-algebra-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-algebra-moduli-macros = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-ecc-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-ecc-sw-macros = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-k256 = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", package = "k256" }

elliptic-curve = { version = "0.13.8" }
ecdsa = { version = "0.16.9" }
hex-literal = { version = "0.4.1", default-features = false }


================================================
FILE: openvm-riscv/guest-ecrecover-manual/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.sha256]

[app_vm_config.modular]
supported_moduli = [
    "115792089237316195423570985008687907853269984665640564039457584007908834671663",
    "115792089237316195423570985008687907852837564279074904382605163141518161494337",
]

[[app_vm_config.ecc.supported_curves]]
struct_name = "Secp256k1Point"
modulus = "115792089237316195423570985008687907853269984665640564039457584007908834671663"
scalar = "115792089237316195423570985008687907852837564279074904382605163141518161494337"
a = "0"
b = "7"

================================================
FILE: openvm-riscv/guest-ecrecover-manual/src/main.rs
================================================
extern crate alloc;

use ecdsa::RecoveryId;
use hex_literal::hex;
use openvm_k256::ecdsa::{Signature, VerifyingKey};
// clippy thinks this is unused, but it's used in the init! macro
use openvm::io::read;
#[allow(unused)]
use openvm_k256::Secp256k1Point;

openvm::init!();

openvm::entry!(main);

/// Signature recovery test vectors
struct RecoveryTestVector {
    pk: [u8; 33],
    sig: [u8; 64],
    recid: RecoveryId,
}

const RECOVERY_TEST_VECTORS: &[RecoveryTestVector] = &[
    // Recovery ID 0
    RecoveryTestVector {
        pk: hex!("021a7a569e91dbf60581509c7fc946d1003b60c7dee85299538db6353538d59574"),
        sig: hex!(
            "ce53abb3721bafc561408ce8ff99c909f7f0b18a2f788649d6470162ab1aa032
                 3971edc523a6d6453f3fb6128d318d9db1a5ff3386feb1047d9816e780039d52"
        ),
        recid: RecoveryId::new(false, false),
    },
    // Recovery ID 1
    RecoveryTestVector {
        pk: hex!("036d6caac248af96f6afa7f904f550253a0f3ef3f5aa2fe6838a95b216691468e2"),
        sig: hex!(
            "46c05b6368a44b8810d79859441d819b8e7cdc8bfd371e35c53196f4bcacdb51
                 35c7facce2a97b95eacba8a586d87b7958aaf8368ab29cee481f76e871dbd9cb"
        ),
        recid: RecoveryId::new(true, false),
    },
];

// Test public key recovery
fn main() {
    let n: u32 = read();
    for _ in 0..n {
        for vector in RECOVERY_TEST_VECTORS {
            let digest = [
                173, 132, 205, 11, 16, 252, 2, 135, 56, 151, 27, 7, 129, 36, 174, 194, 160, 231,
                198, 217, 134, 163, 129, 190, 11, 56, 111, 50, 190, 232, 135, 175,
            ];
            let sig = Signature::try_from(vector.sig.as_slice()).unwrap();
            let recid = vector.recid;
            let pk = VerifyingKey::recover_from_prehash(digest.as_slice(), &sig, recid).unwrap();
            assert_eq!(&vector.pk[..], &pk.to_sec1_bytes(true));
        }
    }
}


================================================
FILE: openvm-riscv/guest-hints-test/Cargo.toml
================================================
[workspace]
[package]
name = "powdr-openvm-guest-hints-test"
version = "0.0.0"
edition = "2021"

[dependencies]
# The `rev` here must point to the same version used in the workspace.
# Otherwise, there is conflict with the `powdr-openvm-hints-guest` dependency (which is part of the workspace).
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
powdr-openvm-hints-guest = { path = "../extensions/hints-guest/" }

[profile.release-with-debug]
inherits = "release"
debug = true


================================================
FILE: openvm-riscv/guest-hints-test/src/main.rs
================================================
#![cfg_attr(target_os = "zkvm", no_main)]
#![cfg_attr(target_os = "zkvm", no_std)]

openvm::entry!(main);
use powdr_openvm_riscv_hints_guest::hint_reverse_bytes;

pub fn main() {
    let res = hint_reverse_bytes(0x11223344);
    assert_eq!(res, 0x44332211);
}


================================================
FILE: openvm-riscv/guest-keccak/Cargo.toml
================================================
[workspace]
[package]
name = "guest-keccak-stdin"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
tiny-keccak = { version = "2.0.2", features = ["keccak"] }

[profile.release-with-debug]
inherits = "release"
debug = true


================================================
FILE: openvm-riscv/guest-keccak/src/main.rs
================================================
#![no_std]
#![no_main]

openvm::entry!(main);

use core::hint::black_box;

use openvm::io::{read, reveal_u32};
use tiny_keccak::{Hasher, Keccak};

pub fn main() {
    let n: u32 = read();
    let mut output = black_box([0u8; 32]);
    for _ in 0..n {
        let mut hasher = Keccak::v256();
        hasher.update(&output);
        hasher.finalize(&mut output);
    }

    reveal_u32(output[0] as u32, 0);
}


================================================
FILE: openvm-riscv/guest-keccak-manual-precompile/Cargo.toml
================================================
[package]
name = "keccak-example"
version = "0.0.0"
edition = "2021"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-platform = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-keccak256 = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }


================================================
FILE: openvm-riscv/guest-keccak-manual-precompile/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.keccak]

================================================
FILE: openvm-riscv/guest-keccak-manual-precompile/src/main.rs
================================================
#![no_std]
#![no_main]

extern crate alloc;

use core::hint::black_box;

use openvm::io::{read, reveal_u32};
use openvm_keccak256::keccak256;

openvm::entry!(main);

pub fn main() {
    let n: u32 = read();
    let mut output = [0u8; 32];
    for _ in 0..n {
        output = keccak256(&black_box(output));
    }

    reveal_u32(output[0] as u32, 0);
}


================================================
FILE: openvm-riscv/guest-matmul/Cargo.toml
================================================
[workspace]
[package]
name = "powdr-openvm-matmul-test"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }

[profile.release-with-debug]
inherits = "release"
debug = true


================================================
FILE: openvm-riscv/guest-matmul/src/main.rs
================================================
#![no_std]
#![no_main]

openvm::entry!(main);

use openvm::io::reveal_u32;

pub fn main() {
    loop_test_matrix();
}

const SIZE: usize = 6;

type Mat = [[i32; SIZE]; SIZE];

#[inline(never)]
fn matrix_multiply_unrolled(a: &Mat, b: &Mat, c: &mut Mat) {
    c[0][0] = a[0][0] * b[0][0]
        + a[0][1] * b[1][0]
        + a[0][2] * b[2][0]
        + a[0][3] * b[3][0]
        + a[0][4] * b[4][0]
        + a[0][5] * b[5][0];
    c[0][1] = a[0][0] * b[0][1]
        + a[0][1] * b[1][1]
        + a[0][2] * b[2][1]
        + a[0][3] * b[3][1]
        + a[0][4] * b[4][1]
        + a[0][5] * b[5][1];
    c[0][2] = a[0][0] * b[0][2]
        + a[0][1] * b[1][2]
        + a[0][2] * b[2][2]
        + a[0][3] * b[3][2]
        + a[0][4] * b[4][2]
        + a[0][5] * b[5][2];
    c[0][3] = a[0][0] * b[0][3]
        + a[0][1] * b[1][3]
        + a[0][2] * b[2][3]
        + a[0][3] * b[3][3]
        + a[0][4] * b[4][3]
        + a[0][5] * b[5][3];
    c[0][4] = a[0][0] * b[0][4]
        + a[0][1] * b[1][4]
        + a[0][2] * b[2][4]
        + a[0][3] * b[3][4]
        + a[0][4] * b[4][4]
        + a[0][5] * b[5][4];
    c[0][5] = a[0][0] * b[0][5]
        + a[0][1] * b[1][5]
        + a[0][2] * b[2][5]
        + a[0][3] * b[3][5]
        + a[0][4] * b[4][5]
        + a[0][5] * b[5][5];

    c[1][0] = a[1][0] * b[0][0]
        + a[1][1] * b[1][0]
        + a[1][2] * b[2][0]
        + a[1][3] * b[3][0]
        + a[1][4] * b[4][0]
        + a[1][5] * b[5][0];
    c[1][1] = a[1][0] * b[0][1]
        + a[1][1] * b[1][1]
        + a[1][2] * b[2][1]
        + a[1][3] * b[3][1]
        + a[1][4] * b[4][1]
        + a[1][5] * b[5][1];
    c[1][2] = a[1][0] * b[0][2]
        + a[1][1] * b[1][2]
        + a[1][2] * b[2][2]
        + a[1][3] * b[3][2]
        + a[1][4] * b[4][2]
        + a[1][5] * b[5][2];
    c[1][3] = a[1][0] * b[0][3]
        + a[1][1] * b[1][3]
        + a[1][2] * b[2][3]
        + a[1][3] * b[3][3]
        + a[1][4] * b[4][3]
        + a[1][5] * b[5][3];
    c[1][4] = a[1][0] * b[0][4]
        + a[1][1] * b[1][4]
        + a[1][2] * b[2][4]
        + a[1][3] * b[3][4]
        + a[1][4] * b[4][4]
        + a[1][5] * b[5][4];
    c[1][5] = a[1][0] * b[0][5]
        + a[1][1] * b[1][5]
        + a[1][2] * b[2][5]
        + a[1][3] * b[3][5]
        + a[1][4] * b[4][5]
        + a[1][5] * b[5][5];

    c[2][0] = a[2][0] * b[0][0]
        + a[2][1] * b[1][0]
        + a[2][2] * b[2][0]
        + a[2][3] * b[3][0]
        + a[2][4] * b[4][0]
        + a[2][5] * b[5][0];
    c[2][1] = a[2][0] * b[0][1]
        + a[2][1] * b[1][1]
        + a[2][2] * b[2][1]
        + a[2][3] * b[3][1]
        + a[2][4] * b[4][1]
        + a[2][5] * b[5][1];
    c[2][2] = a[2][0] * b[0][2]
        + a[2][1] * b[1][2]
        + a[2][2] * b[2][2]
        + a[2][3] * b[3][2]
        + a[2][4] * b[4][2]
        + a[2][5] * b[5][2];
    c[2][3] = a[2][0] * b[0][3]
        + a[2][1] * b[1][3]
        + a[2][2] * b[2][3]
        + a[2][3] * b[3][3]
        + a[2][4] * b[4][3]
        + a[2][5] * b[5][3];
    c[2][4] = a[2][0] * b[0][4]
        + a[2][1] * b[1][4]
        + a[2][2] * b[2][4]
        + a[2][3] * b[3][4]
        + a[2][4] * b[4][4]
        + a[2][5] * b[5][4];
    c[2][5] = a[2][0] * b[0][5]
        + a[2][1] * b[1][5]
        + a[2][2] * b[2][5]
        + a[2][3] * b[3][5]
        + a[2][4] * b[4][5]
        + a[2][5] * b[5][5];

    c[3][0] = a[3][0] * b[0][0]
        + a[3][1] * b[1][0]
        + a[3][2] * b[2][0]
        + a[3][3] * b[3][0]
        + a[3][4] * b[4][0]
        + a[3][5] * b[5][0];
    c[3][1] = a[3][0] * b[0][1]
        + a[3][1] * b[1][1]
        + a[3][2] * b[2][1]
        + a[3][3] * b[3][1]
        + a[3][4] * b[4][1]
        + a[3][5] * b[5][1];
    c[3][2] = a[3][0] * b[0][2]
        + a[3][1] * b[1][2]
        + a[3][2] * b[2][2]
        + a[3][3] * b[3][2]
        + a[3][4] * b[4][2]
        + a[3][5] * b[5][2];
    c[3][3] = a[3][0] * b[0][3]
        + a[3][1] * b[1][3]
        + a[3][2] * b[2][3]
        + a[3][3] * b[3][3]
        + a[3][4] * b[4][3]
        + a[3][5] * b[5][3];
    c[3][4] = a[3][0] * b[0][4]
        + a[3][1] * b[1][4]
        + a[3][2] * b[2][4]
        + a[3][3] * b[3][4]
        + a[3][4] * b[4][4]
        + a[3][5] * b[5][4];
    c[3][5] = a[3][0] * b[0][5]
        + a[3][1] * b[1][5]
        + a[3][2] * b[2][5]
        + a[3][3] * b[3][5]
        + a[3][4] * b[4][5]
        + a[3][5] * b[5][5];

    c[4][0] = a[4][0] * b[0][0]
        + a[4][1] * b[1][0]
        + a[4][2] * b[2][0]
        + a[4][3] * b[3][0]
        + a[4][4] * b[4][0]
        + a[4][5] * b[5][0];
    c[4][1] = a[4][0] * b[0][1]
        + a[4][1] * b[1][1]
        + a[4][2] * b[2][1]
        + a[4][3] * b[3][1]
        + a[4][4] * b[4][1]
        + a[4][5] * b[5][1];
    c[4][2] = a[4][0] * b[0][2]
        + a[4][1] * b[1][2]
        + a[4][2] * b[2][2]
        + a[4][3] * b[3][2]
        + a[4][4] * b[4][2]
        + a[4][5] * b[5][2];
    c[4][3] = a[4][0] * b[0][3]
        + a[4][1] * b[1][3]
        + a[4][2] * b[2][3]
        + a[4][3] * b[3][3]
        + a[4][4] * b[4][3]
        + a[4][5] * b[5][3];
    c[4][4] = a[4][0] * b[0][4]
        + a[4][1] * b[1][4]
        + a[4][2] * b[2][4]
        + a[4][3] * b[3][4]
        + a[4][4] * b[4][4]
        + a[4][5] * b[5][4];
    c[4][5] = a[4][0] * b[0][5]
        + a[4][1] * b[1][5]
        + a[4][2] * b[2][5]
        + a[4][3] * b[3][5]
        + a[4][4] * b[4][5]
        + a[4][5] * b[5][5];

    c[5][0] = a[5][0] * b[0][0]
        + a[5][1] * b[1][0]
        + a[5][2] * b[2][0]
        + a[5][3] * b[3][0]
        + a[5][4] * b[4][0]
        + a[5][5] * b[5][0];
    c[5][1] = a[5][0] * b[0][1]
        + a[5][1] * b[1][1]
        + a[5][2] * b[2][1]
        + a[5][3] * b[3][1]
        + a[5][4] * b[4][1]
        + a[5][5] * b[5][1];
    c[5][2] = a[5][0] * b[0][2]
        + a[5][1] * b[1][2]
        + a[5][2] * b[2][2]
        + a[5][3] * b[3][2]
        + a[5][4] * b[4][2]
        + a[5][5] * b[5][2];
    c[5][3] = a[5][0] * b[0][3]
        + a[5][1] * b[1][3]
        + a[5][2] * b[2][3]
        + a[5][3] * b[3][3]
        + a[5][4] * b[4][3]
        + a[5][5] * b[5][3];
    c[5][4] = a[5][0] * b[0][4]
        + a[5][1] * b[1][4]
        + a[5][2] * b[2][4]
        + a[5][3] * b[3][4]
        + a[5][4] * b[4][4]
        + a[5][5] * b[5][4];
    c[5][5] = a[5][0] * b[0][5]
        + a[5][1] * b[1][5]
        + a[5][2] * b[2][5]
        + a[5][3] * b[3][5]
        + a[5][4] * b[4][5]
        + a[5][5] * b[5][5];
}

#[inline(never)]
fn test_matrix() {
    let a: Mat = [
        [1, 2, 3, 4, 5, 6],
        [7, 8, 9, 10, 11, 12],
        [13, 14, 15, 16, 17, 18],
        [19, 20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29, 30],
        [31, 32, 33, 34, 35, 36],
    ];

    let b: Mat = [
        [37, 38, 39, 40, 41, 42],
        [43, 44, 45, 46, 47, 48],
        [49, 50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59, 60],
        [61, 62, 63, 64, 65, 66],
        [67, 68, 69, 70, 71, 72],
    ];

    let mut c: Mat = [[0; SIZE]; SIZE];

    matrix_multiply_unrolled(&a, &b, &mut c);

    assert_eq!(c[0][0], 1197);
    reveal_u32(c[0][0] as u32, 0);
    reveal_u32(c[5][5] as u32, 1);
}

#[inline(never)]
fn loop_test_matrix() {
    for _ in 0..8000 {
        test_matrix();
    }
}


================================================
FILE: openvm-riscv/guest-pairing/Cargo.toml
================================================
[package]
name = "guest-pairing"
version = "0.1.0"
edition = "2024"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }

ark-bn254 = "0.5"
ark-ec = "0.5"
ark-ff = "0.5"
hex = "0.4"


================================================
FILE: openvm-riscv/guest-pairing/src/main.rs
================================================
use ark_bn254::{Bn254, Fq, Fq2, G1Affine, G2Affine};
use ark_ec::pairing::Pairing;
use ark_ff::fields::PrimeField;
use ark_ff::One;

openvm::entry!(main);

const PAIR_ELEMENT_LEN: usize = 32 * (2 + 4); // G1 (2 Fq), G2 (4 Fq)

fn main() {
    let input = hex::decode(
        "\
            1c76476f4def4bb94541d57ebba1193381ffa7aa76ada664dd31c16024c43f59\
            3034dd2920f673e204fee2811c678745fc819b55d3e9d294e45c9b03a76aef41\
            209dd15ebff5d46c4bd888e51a93cf99a7329636c63514396b4a452003a35bf7\
            04bf11ca01483bfa8b34b43561848d28905960114c8ac04049af4b6315a41678\
            2bb8324af6cfc93537a2ad1a445cfd0ca2a71acd7ac41fadbf933c2a51be344d\
            120a2a4cf30c1bf9845f20c6fe39e07ea2cce61f0c9bb048165fe5e4de877550\
            111e129f1cf1097710d41c4ac70fcdfa5ba2023c6ff1cbeac322de49d1b6df7c\
            2032c61a830e3c17286de9462bf242fca2883585b93870a73853face6a6bf411\
            198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2\
            1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed\
            090689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b\
            12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa",
    )
    .unwrap();

    let elements = input.len() / PAIR_ELEMENT_LEN;

    let mut g1_vec = Vec::with_capacity(elements);
    let mut g2_vec = Vec::with_capacity(elements);

    for idx in 0..elements {
        let read_fq_at = |n: usize| {
            debug_assert!(n < PAIR_ELEMENT_LEN / 32);
            let start = idx * PAIR_ELEMENT_LEN + n * 32;
            let slice = unsafe { input.get_unchecked(start..start + 32) };
            Fq::from_be_bytes_mod_order(&slice[..32])
        };

        let g1_x = read_fq_at(0);
        let g1_y = read_fq_at(1);
        let g2_x_c1 = read_fq_at(2);
        let g2_x_c0 = read_fq_at(3);
        let g2_y_c1 = read_fq_at(4);
        let g2_y_c0 = read_fq_at(5);

        let g1 = G1Affine::new_unchecked(g1_x, g1_y);
        let g2_x = Fq2::new(g2_x_c0, g2_x_c1);
        let g2_y = Fq2::new(g2_y_c0, g2_y_c1);
        let g2 = G2Affine::new_unchecked(g2_x, g2_y);

        g1_vec.push(g1);
        g2_vec.push(g2);
    }

    let result = Bn254::multi_pairing(g1_vec, g2_vec);
    assert_eq!(result.0, <Bn254 as Pairing>::TargetField::one());
}


================================================
FILE: openvm-riscv/guest-pairing-manual-precompile/Cargo.toml
================================================
[package]
name = "openvm-pairing-example"
version = "0.0.0"
edition = "2021"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }

openvm-algebra-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false }
openvm-ecc-guest = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false }
openvm-pairing = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", default-features = false, features = [
  "bn254",
] }

hex = { version = "0.4.3", default-features = false, features = ["alloc"] }


================================================
FILE: openvm-riscv/guest-pairing-manual-precompile/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.modular]
supported_moduli = [
    "21888242871839275222246405745257275088696311157297823662689037894645226208583", # coordinate field
    "21888242871839275222246405745257275088548364400416034343698204186575808495617", # scalar field
]

[app_vm_config.fp2]
supported_moduli = [
    ["Bn254Fp2", "21888242871839275222246405745257275088696311157297823662689037894645226208583"],
]

[app_vm_config.pairing]
supported_curves = ["Bn254"]

[[app_vm_config.ecc.supported_curves]]
struct_name = "Bn254G1Affine"
modulus = "21888242871839275222246405745257275088696311157297823662689037894645226208583"
scalar = "21888242871839275222246405745257275088548364400416034343698204186575808495617"
a = "0"
b = "3"


================================================
FILE: openvm-riscv/guest-pairing-manual-precompile/src/main.rs
================================================
use openvm_algebra_guest::IntMod;
use openvm_ecc_guest::AffinePoint;
use {
    openvm_pairing::bn254::{Bn254, Fp, Fp2},
    openvm_pairing::PairingCheck,
};

openvm::init!();

const PAIR_ELEMENT_LEN: usize = 32 * (2 + 4); // 1 G1Affine (2 Fp), 1 G2Affine (4 Fp)

// code mostly taken from openvm repo guest benchmarks
pub fn main() {
    let input = hex::decode(
        "\
            1c76476f4def4bb94541d57ebba1193381ffa7aa76ada664dd31c16024c43f59\
            3034dd2920f673e204fee2811c678745fc819b55d3e9d294e45c9b03a76aef41\
            209dd15ebff5d46c4bd888e51a93cf99a7329636c63514396b4a452003a35bf7\
            04bf11ca01483bfa8b34b43561848d28905960114c8ac04049af4b6315a41678\
            2bb8324af6cfc93537a2ad1a445cfd0ca2a71acd7ac41fadbf933c2a51be344d\
            120a2a4cf30c1bf9845f20c6fe39e07ea2cce61f0c9bb048165fe5e4de877550\
            111e129f1cf1097710d41c4ac70fcdfa5ba2023c6ff1cbeac322de49d1b6df7c\
            2032c61a830e3c17286de9462bf242fca2883585b93870a73853face6a6bf411\
            198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2\
            1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed\
            090689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b\
            12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa",
    )
    .unwrap();

    let elements = input.len() / PAIR_ELEMENT_LEN;

    let mut p = Vec::with_capacity(elements);
    let mut q = Vec::with_capacity(elements);

    for idx in 0..elements {
        let read_fq_at = |n: usize| {
            debug_assert!(n < PAIR_ELEMENT_LEN / 32);
            let start = idx * PAIR_ELEMENT_LEN + n * 32;
            let slice = unsafe { input.get_unchecked(start..start + 32) };
            Fp::from_be_bytes(&slice[..32])
        };
        let g1_x = read_fq_at(0).unwrap();
        let g1_y = read_fq_at(1).unwrap();
        let g2_x_c1 = read_fq_at(2).unwrap();
        let g2_x_c0 = read_fq_at(3).unwrap();
        let g2_y_c1 = read_fq_at(4).unwrap();
        let g2_y_c0 = read_fq_at(5).unwrap();

        let g1 = AffinePoint::new(g1_x, g1_y);
        let g2_x = Fp2::new(g2_x_c0, g2_x_c1);
        let g2_y = Fp2::new(g2_y_c0, g2_y_c1);
        let g2 = AffinePoint::new(g2_x, g2_y);

        p.push(g1);
        q.push(g2);
    }
    let success = Bn254::pairing_check(&p, &q).is_ok();
    assert!(success);
}


================================================
FILE: openvm-riscv/guest-sha256/Cargo.toml
================================================
[workspace]
[package]
name = "guest-sha256-stdin"
version = "0.0.0"
edition = "2021"

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
sha2 = { version = "0.10", default-features = false }
digest = { version = "0.10", default-features = false }

[profile.release-with-debug]
inherits = "release"
debug = true


================================================
FILE: openvm-riscv/guest-sha256/src/main.rs
================================================
#![no_std]
#![no_main]

openvm::entry!(main);

use core::hint::black_box;

use openvm::io::{read, reveal_u32};
use sha2::{Digest, Sha256};

pub fn main() {
    let n: u32 = read();
    let mut output = black_box([0u8; 32]);
    for _ in 0..n {
        output = Sha256::digest(output).into();
    }

    reveal_u32(output[0] as u32, 0);
}


================================================
FILE: openvm-riscv/guest-sha256-manual-precompile/Cargo.toml
================================================
[package]
name = "sha256-example"
version = "0.0.0"
edition = "2021"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-platform = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }
openvm-sha2 = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4" }


================================================
FILE: openvm-riscv/guest-sha256-manual-precompile/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.sha256]

================================================
FILE: openvm-riscv/guest-sha256-manual-precompile/src/main.rs
================================================
#![no_std]
#![no_main]

extern crate alloc;

use core::hint::black_box;

use openvm::io::{read, reveal_u32};
use openvm_sha2::sha256;

openvm::entry!(main);

pub fn main() {
    let n = read();
    let mut output = black_box([0u8; 32]);
    for _ in 0..n {
        output = sha256(&output);
    }

    reveal_u32(output[0] as u32, 0);
}


================================================
FILE: openvm-riscv/guest-u256/Cargo.toml
================================================
[package]
name = "u256-example"
version = "0.0.0"
edition = "2021"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
ruint = "1.16"


================================================
FILE: openvm-riscv/guest-u256/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.bigint]

================================================
FILE: openvm-riscv/guest-u256/src/main.rs
================================================
#![allow(clippy::needless_range_loop)]
use core::array;

use ruint::aliases::U256;

openvm::entry!(main);

const N: usize = 70;
type Matrix = [[U256; N]; N];

pub fn get_matrix(val: u32) -> Matrix {
    array::from_fn(|_| array::from_fn(|_| U256::from(val)))
}

pub fn mult(a: &Matrix, b: &Matrix) -> Matrix {
    let mut c = get_matrix(0);
    for i in 0..N {
        for j in 0..N {
            for k in 0..N {
                c[i][j] += a[i][k] * b[k][j];
            }
        }
    }
    c
}

pub fn get_identity_matrix() -> Matrix {
    let mut res = get_matrix(0);
    for i in 0..N {
        res[i][i] = U256::from(1u32);
    }
    res
}

pub fn main() {
    let a: Matrix = get_identity_matrix();
    let b: Matrix = get_matrix(28);
    let c: Matrix = mult(&a, &b);
    if c != b {
        panic!("Matrix multiplication failed");
    }
}


================================================
FILE: openvm-riscv/guest-u256-manual-precompile/Cargo.toml
================================================
[package]
name = "u256-example"
version = "0.0.0"
edition = "2021"

[workspace]
members = []

[dependencies]
openvm = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", features = [
  "std",
] }
openvm-ruint = { git = "https://github.com/powdr-labs/openvm.git", tag = "v1.4.2-powdr-rc.4", package = "ruint" }


================================================
FILE: openvm-riscv/guest-u256-manual-precompile/openvm.toml
================================================
[app_vm_config.rv32i]
[app_vm_config.rv32m]
[app_vm_config.io]
[app_vm_config.bigint]

================================================
FILE: openvm-riscv/guest-u256-manual-precompile/src/main.rs
================================================
#![allow(clippy::needless_range_loop)]
use core::array;

use openvm_ruint::aliases::U256;

openvm::entry!(main);

const N: usize = 70;
type Matrix = [[U256; N]; N];

pub fn get_matrix(val: u32) -> Matrix {
    array::from_fn(|_| array::from_fn(|_| U256::from(val)))
}

pub fn mult(a: &Matrix, b: &Matrix) -> Matrix {
    let mut c = get_matrix(0);
    for i in 0..N {
        for j in 0..N {
            for k in 0..N {
                c[i][j] += a[i][k] * b[k][j];
            }
        }
    }
    c
}

pub fn get_identity_matrix() -> Matrix {
    let mut res = get_matrix(0);
    for i in 0..N {
        res[i][i] = U256::from(1u32);
    }
    res
}

pub fn main() {
    let a: Matrix = get_identity_matrix();
    let b: Matrix = get_matrix(28);
    let c: Matrix = mult(&a, &b);
    if c != b {
        panic!("Matrix multiplication failed");
    }
}


================================================
FILE: openvm-riscv/scripts/basic_metrics.py
================================================
#!/usr/bin/env python3

import argparse
import json
import os
from collections import OrderedDict
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from metrics_utils import load_metrics_dataframes, is_normal_instruction_air


def get_label(filepath):
    """Extract a label from a metrics file path.

    Use parent directory name if file is metrics.json, otherwise use filename without extension.
    """
    basename = os.path.basename(filepath)
    if basename == "metrics.json":
        return os.path.basename(os.path.dirname(filepath))
    else:
        return os.path.splitext(basename)[0]

def extract_metrics(filename):
    app, leaf, internal = load_metrics_dataframes(filename)
    metrics = OrderedDict()

    powdr_air = app[app["air_name"].fillna('').str.startswith("PowdrAir")]
    non_powdr_air = app[~app["air_name"].fillna('').str.startswith("PowdrAir")]
    
    # Split non_powdr_air into normal instructions and openvm precompiles
    is_normal_instruction = non_powdr_air["air_name"].fillna('').apply(is_normal_instruction_air)
    normal_instruction_air = non_powdr_air[is_normal_instruction]
    openvm_precompile_air = non_powdr_air[~is_normal_instruction]

    def get_metric(df, metric_name):
        return pd.to_numeric(df[df["metric"] == metric_name]["value"]).sum()

    # Compute total proof times
    app_proof_time_ms = get_metric(app, "total_proof_time_ms")
    leaf_proof_time_ms = get_metric(leaf, "total_proof_time_ms")
    internal_proof_time_ms = get_metric(internal, "total_proof_time_ms")
    total_proof_time_ms = app_proof_time_ms + leaf_proof_time_ms + internal_proof_time_ms

    app_proof_time_excluding_trace_ms = get_metric(app, "stark_prove_excluding_trace_time_ms")
    leaf_proof_time_excluding_trace_ms = get_metric(leaf, "stark_prove_excluding_trace_time_ms")
    internal_proof_time_excluding_trace_ms = get_metric(internal, "stark_prove_excluding_trace_time_ms")
    total_proof_time_excluding_trace_ms = app_proof_time_excluding_trace_ms + leaf_proof_time_excluding_trace_ms + internal_proof_time_excluding_trace_ms

    # Compute total column counts
    # Note that this sums the columns over *all* segments.
    # This metric should roughly correlate with leaf proof time.
    main_cols = get_metric(app, "main_cols")
    prep_cols = get_metric(app, "prep_cols")
    perm_cols = get_metric(app, "perm_cols")
    app_proof_cols = main_cols + prep_cols + perm_cols

    num_segments = int(pd.to_numeric(app["segment"]).max()) + 1

    metrics["filename"] = filename
    metrics["num_segments"] = num_segments
    metrics["app_proof_cells"] = get_metric(app, "total_cells")
    metrics["app_proof_cols"] = app_proof_cols
    metrics["total_proof_time_ms"] = total_proof_time_ms
    metrics["total_proof_time_excluding_trace_ms"] = total_proof_time_excluding_trace_ms
    metrics["app_proof_time_ms"] = app_proof_time_ms
    metrics["app_proof_time_excluding_trace_ms"] = app_proof_time_excluding_trace_ms
    metrics["app_execute_preflight_time_ms"] = get_metric(app, "execute_preflight_time_ms")
    metrics["app_execute_metered_time_ms"] = get_metric(app, "execute_metered_time_ms")
    metrics["app_trace_gen_time_ms"] = get_metric(app, "trace_gen_time_ms")
    metrics["leaf_proof_time_ms"] = leaf_proof_time_ms
    metrics["leaf_proof_time_excluding_trace_ms"] = leaf_proof_time_excluding_trace_ms
    metrics["inner_recursion_proof_time_ms"] = internal_proof_time_ms
    metrics["inner_recursion_proof_time_excluding_trace_ms"] = internal_proof_time_excluding_trace_ms

    normal_instruction_cells = get_metric(normal_instruction_air, "cells")
    openvm_precompile_cells = get_metric(openvm_precompile_air, "cells")
    powdr_cells = get_metric(powdr_air, "cells")
    assert(metrics["app_proof_cells"] == powdr_cells + normal_instruction_cells + openvm_precompile_cells)

    metrics["normal_instruction_ratio"] = normal_instruction_cells / metrics["app_proof_cells"]
    metrics["openvm_precompile_ratio"] = openvm_precompile_cells / metrics["app_proof_cells"]
    metrics["powdr_ratio"] = powdr_cells / metrics["app_proof_cells"]
    metrics["powdr_rows"] = get_metric(powdr_air, "rows")

    return metrics

def summary_table(metrics_files, csv):
    file_metrics = [ extract_metrics(filename) for filename in metrics_files ]

    df = pd.DataFrame(file_metrics)
    if csv:
        print(df.to_csv(index=False))
    else:
        print(df.to_string(index=False))

def plot(metrics_files, output):
    file_metrics = [ extract_metrics(filename) for filename in metrics_files ]
    df = pd.DataFrame(file_metrics)

    # Compute app "other" time
    df["app_other_ms"] = (
        df["app_proof_time_ms"]
        - df["app_proof_time_excluding_trace_ms"]
        - df["app_execute_preflight_time_ms"]
        - df["app_execute_metered_time_ms"]
        - df["app_trace_gen_time_ms"]
    )

    # Stack components (bottom to top) with colors
    # App components use shades of blue, others use distinct colors
    components = [
        ("inner_recursion_proof_time_ms", "Inner recursion", "#9b3e00"),        
        ("leaf_proof_time_ms", "Leaf recursion", "#d69600"),                              
        ("app_proof_time_excluding_trace_ms", "App STARK (excl. trace)", "#1f77b4"),  
        ("app_trace_gen_time_ms", "App trace gen", "#6baed6"),                        
        ("app_execute_preflight_time_ms", "App preflight", "#9ecae1"),        
        ("app_execute_metered_time_ms", "App metered", "#c6dbef"),            
        ("app_other_ms", "App other", "#08519c"),                                     
    ]

    x_labels = [get_label(f) for f in df["filename"]]

    import numpy as np
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
    plt.subplots_adjust(bottom=0.18)  # Make room for legend at bottom

    # === Left plot: Stacked bars ===
    bottom = [0.0] * len(df)
    bars_data = []  # Store bar info for labeling
    for col, label, color in components:
        values = [v / 1000 for v in df[col].tolist()]  # Convert ms to seconds
        bars = ax1.bar(x_labels, values, bottom=bottom, label=label, color=color)
        bars_data.append((bars, values, bottom.copy(), color))
        bottom = [b + v for b, v in zip(bottom, values)]

    # Get the total height for threshold calculation
    max_height = max(bottom)
    min_label_height = max_height * 0.02

    # Add value labels to each segment
    for bars, values, bottoms, color in bars_data:
        for bar, value, bot, top in zip(bars, values, bottoms, bottom):
            if value < min_label_height:
                continue
            center_y = bot + value / 2
            center_x = bar.get_x() + bar.get_width() / 2
            text_color = 'black'
            percentage = value / top * 100 if top > 0 else 0
            ax1.text(center_x, center_y, f'{value:.1f} ({percentage:.1f}%)', ha='center', va='center',
                     fontsize=8, color=text_color, fontweight='bold')

    # Add total labels on top of each stack
    last_bars = bars_data[-1][0]
    for bar, total in zip(last_bars, bottom):
        center_x = bar.get_x() + bar.get_width() / 2
        ax1.text(center_x, total + max_height * 0.01, f'Total: {total:.1f}', ha='center', va='bottom',
                 fontsize=9, color='black', fontweight='bold')

    ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.2f}'))
    ax1.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax1.grid(axis='y', which='major', linestyle='-', alpha=0.4)
    ax1.grid(axis='y', which='minor', linestyle='--', alpha=0.2)
    ax1.set_axisbelow(True)
    ax1.set_ylabel("Time (s)")
    ax1.set_title("Stacked")

    # === Right plot: Grouped bars ===
    n_configs = len(x_labels)
    n_components = len(components)
    bar_width = 0.8 / n_components
    x_pos = np.arange(n_configs)

    for i, (col, label, color) in enumerate(components):
        values = [v / 1000 for v in df[col].tolist()]
        offset = (i - n_components / 2 + 0.5) * bar_width
        ax2.bar(x_pos + offset, values, bar_width, label=label, color=color)

    ax2.set_xticks(x_pos)
    ax2.set_xticklabels(x_labels)
    ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.2f}'))
    ax2.yaxis.set_minor_locator(AutoMinorLocator(2))
    ax2.grid(axis='y', which='major', linestyle='-', alpha=0.4)
    ax2.grid(axis='y', which='minor', linestyle='--', alpha=0.2)
    ax2.set_axisbelow(True)
    ax2.set_ylabel("Time (s)")
    ax2.set_title("By Component")

    # Shared legend below both plots
    handles, legend_labels = ax1.get_legend_handles_labels()
    fig.legend(handles, legend_labels, loc="upper center", bbox_to_anchor=(0.5, 0.02),
               ncol=len(components), frameon=False, fontsize=9)

    plt.tight_layout()
    if output:
        plt.savefig(output, bbox_inches='tight')
        print(f"Plot saved to {output}")
    else:
        plt.show()

def combine(metrics_files):
    combined = OrderedDict()
    for filepath in metrics_files:
        label = get_label(filepath)
        with open(filepath) as f:
            combined[label] = json.load(f)
    print(json.dumps(combined))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Print basic metrics from a set of metrics JSON files.")
    subparsers = parser.add_subparsers(dest="command", required=True)

    summary_parser = subparsers.add_parser("summary-table", help="Print a summary table of metrics")
    summary_parser.add_argument('metrics_files', nargs='+', help='Paths to the metrics JSON files')
    summary_parser.add_argument('--csv', action='store_true', help='Output in CSV format')

    plot_parser = subparsers.add_parser("plot", help="Plot a stacked bar chart of proof time breakdown")
    plot_parser.add_argument('metrics_files', nargs='+', help='Paths to the metrics JSON files')
    plot_parser.add_argument('--output', '-o', help='Output file path (if not specified, displays interactively)')

    combine_parser = subparsers.add_parser("combine", help="Combine metrics JSON files into a single JSON")
    combine_parser.add_argument('metrics_files', nargs='+', help='Paths to the metrics JSON files')

    args = parser.parse_args()
    if args.command == "summary-table":
        summary_table(args.metrics_files, args.csv)
    elif args.command == "plot":
        plot(args.metrics_files, args.output)
    elif args.command == "combine":
        combine(args.metrics_files)


================================================
FILE: openvm-riscv/scripts/generate_bench_results_readme.py
================================================
from __future__ import annotations

from argparse import ArgumentParser
from pathlib import Path
from urllib.parse import quote

BENCH_RESULTS_BLOB_BASE = "https://github.com/powdr-labs/bench-results/blob/gh-pages"
BENCH_RESULTS_TREE_BASE = "https://github.com/powdr-labs/bench-results/tree/gh-pages"
APC_ANALYZER_BASE = "https://powdr-labs.github.io/powdr/autoprecompile-analyzer/"
METRICS_VIEWER_BASE = "https://powdr-labs.github.io/powdr/openvm/metrics-viewer/"


def github_blob_url(relative_path: Path, run_id: str) -> str:
    path = Path("results") / run_id / relative_path
    return f"{BENCH_RESULTS_BLOB_BASE}/{path.as_posix()}"


def github_tree_url(run_id: str, subdir: str | None = None) -> str:
    path = Path("results") / run_id
    if subdir:
        path = path / subdir
    return f"{BENCH_RESULTS_TREE_BASE}/{path.as_posix()}"


def viewer_url(viewer_base: str, data_url: str) -> str:
    return f"{viewer_base}?data={quote(data_url, safe='')}"


def find_apc_candidates(experiment_dir: Path) -> Path | None:
    # All apc_candidates.json files within an experiment should be identical
    # (with pgo=cell, all APCs are computed regardless of how many are selected),
    # so we just pick any one deterministically.
    candidates = sorted(experiment_dir.glob("**/apc_candidates.json"))
    if not candidates:
        return None

    return min(
        candidates,
        key=lambda path: (len(path.relative_to(experiment_dir).parts), path.as_posix()),
    )


def generate_readme(results_dir: Path, run_id: str) -> str:
    experiments: list[dict[str, str]] = []

    for experiment_dir in sorted(path for path in results_dir.iterdir() if path.is_dir()):
        name = experiment_dir.name
        metrics_path = experiment_dir / "combined_metrics.json"
        apc_path = find_apc_candidates(experiment_dir)

        entry: dict[str, str] = {"name": name}

        if metrics_path.exists():
            metrics_data_url = github_blob_url(metrics_path.relative_to(results_dir), run_id)
            entry["metrics_url"] = viewer_url(METRICS_VIEWER_BASE, metrics_data_url)

        if apc_path is not None:
            apc_data_url = github_blob_url(apc_path.relative_to(results_dir), run_id)
            entry["apc_url"] = viewer_url(APC_ANALYZER_BASE, apc_data_url)

        entry["tree_url"] = github_tree_url(run_id, name)

        experiments.append(entry)

    # Put reth first if present, then the rest alphabetically.
    experiments.sort(key=lambda e: (0 if e["name"] == "reth" else 1, e["name"]))

    lines = [
        f"# Bench results — {run_id}",
        "",
    ]

    for exp in experiments:
        name = exp["name"]
        links = [f"📂 [Raw data]({exp['tree_url']})"]
        if "metrics_url" in exp:
            links.append(f"📊 [Metrics Viewer]({exp['metrics_url']})")
        if "apc_url" in exp:
            links.append(f"🔍 [APC Analyzer]({exp['apc_url']})")
        lines.append(f"**{name}**: " + " &nbsp;|&nbsp; ".join(links))
        lines.append("")

    return "\n".join(lines)


def main() -> None:
    parser = ArgumentParser(description="Generate a README for a published bench-results run.")
    parser.add_argument("results_dir", type=Path)
    parser.add_argument("run_id")
    parser.add_argument("--output", type=Path, default=None)
    args = parser.parse_args()

    readme = generate_readme(args.results_dir, args.run_id)

    if args.output is None:
        print(readme, end="")
    else:
        args.output.write_text(readme)


if __name__ == "__main__":
    main()


================================================
FILE: openvm-riscv/scripts/metrics_utils.py
================================================
#!/usr/bin/env python3

import sys
import json
import re
import pandas as pd

def load_metrics_dataframes(filename):
    """Load metrics JSON file and return app, leaf, and internal dataframes.
    
    Each dataframe has a "metric" and "value" column, along with optional columns
    like "air_name", or "segment".
    """
    with open(filename) as f:
        metrics_json = json.load(f)

    entries = [
        dict(c["labels"]) | { "metric": c["metric"], "value": c["value"] }
        for c in metrics_json["counter"] + metrics_json["gauge"]
    ]

    df = pd.DataFrame(entries)

    # "group" has different values if coming from reth benchmark or the powdr cli
    app = df[df["group"].fillna('').str.startswith("app_proof")]
    if len(app) == 0:
        app = df[df["group"].fillna('').str.startswith("reth")]
    if len(app) == 0:
        print("Invalid metrics.json", file=sys.stderr)
        exit(1)

    leaf = df[df["group"].fillna('').str.startswith("leaf")]
    internal = df[df["group"].fillna('').str.startswith("internal")]
    
    return app, leaf, internal

def is_normal_instruction_air(air_name):
    """Check if an AIR name represents a normal RISC-V instruction.
    
    Rules:
    - Must be a VmAirWrapper<Adapter, Core>
    - If the core chip is FieldExpressionCoreAir, return False
    - If the core chip has numeric parameters and first one (number of limbs) is not 4, return False
    - Otherwise return True
    """
    # Match VmAirWrapper<Adapter, Core> pattern
    match = re.match(r'^VmAirWrapper<[^,]+,\s*([^>]+?)(?:<(\d+)(?:,\s*\d+)*>)?\s*>$', air_name)
    
    if not match:
        return False
    
    core_name = match.group(1)
    num_limbs = match.group(2)
    
    if "FieldExpressionCoreAir" == core_name:
        return False
    if num_limbs and int(num_limbs) != 4:
        return False
    
    return True

def test_is_normal_instruction_air():
    # Test cases from the reth benchmark
    assert is_normal_instruction_air("VmAirWrapper<Rv32LoadStoreAdapterAir, LoadStoreCoreAir<4>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32BaseAluAdapterAir, BaseAluCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32BaseAluAdapterAir, ShiftCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32BranchAdapterAir, BranchEqualCoreAir<4>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32JalrAdapterAir, Rv32JalrCoreAir>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32BaseAluAdapterAir, LessThanCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32CondRdWriteAdapterAir, Rv32JalLuiCoreAir>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32BranchAdapterAir, BranchLessThanCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32RdWriteAdapterAir, Rv32AuipcCoreAir>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32MultAdapterAir, MultiplicationCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32MultAdapterAir, MulHCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32LoadStoreAdapterAir, LoadSignExtendCoreAir<4, 8>>")
    assert is_normal_instruction_air("VmAirWrapper<Rv32MultAdapterAir, DivRemCoreAir<4, 8>>")
    
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<1, 2, 2, 32, 32>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<2, 2, 2, 32, 32>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<2, 6, 6, 16, 16>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<2, 1, 1, 32, 32>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<2, 3, 3, 16, 16>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32VecHeapAdapterAir<1, 6, 6, 16, 16>, FieldExpressionCoreAir>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32IsEqualModAdapterAir<2, 1, 32, 32>, ModularIsEqualCoreAir<32, 4, 8>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32HeapAdapterAir<2, 32, 32>, BaseAluCoreAir<32, 8>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32HeapBranchAdapterAir<2, 32>, BranchEqualCoreAir<32>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32HeapAdapterAir<2, 32, 32>, ShiftCoreAir<32, 8>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32HeapAdapterAir<2, 32, 32>, MultiplicationCoreAir<32, 8>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32HeapAdapterAir<2, 32, 32>, LessThanCoreAir<32, 8>>")
    assert not is_normal_instruction_air("VmAirWrapper<Rv32IsEqualModAdapterAir<2, 3, 16, 48>, ModularIsEqualCoreAir<48, 4, 8>>")
    assert not is_normal_instruction_air("KeccakVmAir")
    assert not is_normal_instruction_air("PowdrAir<BabyBearField>")
    assert not is_normal_instruction_air("Poseidon2PeripheryAir<BabyBearParameters>, 1>")
    assert not is_normal_instruction_air("MemoryMerkleAir<8>")
    assert not is_normal_instruction_air("AccessAdapterAir<8>")
    assert not is_normal_instruction_air("PersistentBoundaryAir<8>")
    assert not is_normal_instruction_air("Rv32HintStoreAir")
    assert not is_normal_instruction_air("AccessAdapterAir<16>")
    assert not is_normal_instruction_air("RangeTupleCheckerAir<2>")
    assert not is_normal_instruction_air("ProgramAir")
    assert not is_normal_instruction_air("AccessAdapterAir<32>")
    assert not is_normal_instruction_air("AccessAdapterAir<2>")
    assert not is_normal_instruction_air("AccessAdapterAir<4>")
    assert not is_normal_instruction_air("VariableRangeCheckerAir")
    assert not is_normal_instruction_air("BitwiseOperationLookupAir<8>")
    assert not is_normal_instruction_air("PhantomAir")
    assert not is_normal_instruction_air("VmConnectorAir")


================================================
FILE: openvm-riscv/scripts/plot_trace_cells.py
================================================
#!/usr/bin/env python3

import pandas as pd
import matplotlib.pyplot as plt
import argparse
from metrics_utils import load_metrics_dataframes

def autopct_with_billions(pct, total):
    val = pct * total / 100
    return f'{pct:.1f}%\n{val/1e9:.2f}B'

def compute_cells_by_air(metrics_path):
    # Load only the app dataframe
    app, _, _ = load_metrics_dataframes(metrics_path)
    
    # Get total cells from app dataframe
    total_cells_df = app[app["metric"] == "total_cells"]
    total_cells = pd.to_numeric(total_cells_df["value"]).sum()
    print(f"Total cells: {total_cells/1e9:.2f}B")
    
    # Get cell entries from app dataframe
    cells_df = app[app["metric"] == "cells"].copy()
    cells_df["segment"] = pd.to_numeric(cells_df["segment"].fillna(0))
    cells_df["cells"] = pd.to_numeric(cells_df["value"])
    
    # Create dataframe with required columns
    df = cells_df[["segment", "air_name", "cells"]]

    # Group and threshold
    cells_by_air = df.groupby('air_name')['cells'].sum().sort_values(ascending=False)

    # Sanity check: #cells should match total_cells
    assert total_cells == cells_by_air.sum()

    return cells_by_air

def main(metrics_path, output_path=None, subtitle=None):
    cells_by_air = compute_cells_by_air(metrics_path)
    print("Cells by AIR:")
    print(cells_by_air)

    threshold_ratio = 0.015
    threshold = threshold_ratio * cells_by_air.sum()
    large = cells_by_air[cells_by_air >= threshold]
    small = cells_by_air[cells_by_air < threshold]

    if not small.empty:
        large['Other'] = small.sum()

    _, ax = plt.subplots(figsize=(7.5, 7.5))
    plot_title = "Trace cells by AIR" if subtitle is None else f"Trace cells by AIR ({subtitle})"
    ax.set_title(plot_title)
    total = large.sum()
    colors = plt.get_cmap("tab20")(range(len(large)))
    def autopct_filtered(pct):
        return autopct_with_billions(pct, total) if pct > 5 else ''

    wedges, _, _ = ax.pie(
        large,
        autopct=autopct_filtered,
        startangle=90,
        colors=colors
    )
    percentages = 100 * large / total
    legend_labels = [f"{percent:.1f}% - {label}" for label, percent in zip(large.index, percentages)]
    ax.legend(
        wedges,
        legend_labels,
        title="AIRs",
        loc="upper center",
        bbox_to_anchor=(0.5, 0),
        ncol=1,
        fontsize='small',
        title_fontsize='medium',
        frameon=False
    )
    plt.ylabel('')
    plt.tight_layout(pad=5.0)
    if output_path:
        print(f"Saving plot to {output_path}")
        plt.savefig(output_path, bbox_inches="tight")
    else:
        plt.show()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Visualize AIR cell metrics from a JSON file.")
    parser.add_argument("metrics_path", help="Path to the metrics.json file")
    parser.add_argument("-o", "--output", help="Optional path to save the output image")
    parser.add_argument("-s", "--subtitle", help="Optional subtitle for the plot")
    args = parser.parse_args()

    main(args.metrics_path, args.output, args.subtitle)

================================================
FILE: openvm-riscv/scripts/readme.md
================================================
### Scripts

Set up (from the project root):

```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r openvm-riscv/scripts/requirements.txt
```

================================================
FILE: openvm-riscv/scripts/requirements.txt
================================================
pandas
matplotlib
psrecord
pytest


================================================
FILE: openvm-riscv/scripts/run_guest_benches.sh
================================================
#!/bin/bash

# Script to collect some numbers from our OpenVM guest examples.
# Mostly for CI usage, but can be easily modified for manual tests.

# NOTE: The script expects the python environment to be set up with the required
# dependencies. Should be run from the project root, will create a `results`
# directory.

set -e

SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
SCRIPTS_DIR=$(dirname "$SCRIPT_PATH")

run_bench() {
    guest="$1"
    input="$2"
    apcs="$3"
    run_name="$4"

    echo ""
    echo "==== ${run_name} ===="
    echo ""

    mkdir -p "${run_name}"

    psrecord --include-children --interval 1 \
        --log "${run_name}"/psrecord.csv \
        --log-format csv \
        --plot "${run_name}"/psrecord.png \
        "cargo run --bin powdr_openvm_riscv -r --features metrics prove \"$guest\" --input \"$input\" --autoprecompiles \"$apcs\" --metrics \"${run_name}/metrics.json\" --recursion --apc-candidates-dir \"${run_name}\""

    python3 "$SCRIPTS_DIR"/plot_trace_cells.py -o "${run_name}"/trace_cells.png "${run_name}"/metrics.json > "${run_name}"/trace_cells.txt

    # apc_candidates.json is only available when apcs > 0
    if [ "${apcs:-0}" -ne 0 ]; then
        python3 "$SCRIPTS_DIR"/../../autoprecompiles/scripts/plot_effectiveness.py "${run_name}"/apc_candidates.json --output "${run_name}"/effectiveness.png
    fi

    # Clean up some files that we don't want to to push.
    rm -f "${run_name}"/apc_candidate_*
}

# TODO: Some benchmarks are currently disabled to keep the nightly run below 6h.

### Keccak
dir="results/keccak"
input="10000"

mkdir -p "$dir"
pushd "$dir"

run_bench guest-keccak-manual-precompile "$input" 0 manual
run_bench guest-keccak "$input" 0 apc000
run_bench guest-keccak "$input" 3 apc003
# run_bench guest-keccak "$input" 10 apc010  # Save ~3mins
run_bench guest-keccak "$input" 30 apc030

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### SHA256
dir="results/sha256"
input="30000"

mkdir -p "$dir"
pushd "$dir"

run_bench guest-sha256-manual-precompile "$input" 0 manual
run_bench guest-sha256 "$input" 0 apc000
run_bench guest-sha256 "$input" 3 apc003
# run_bench guest-sha256 "$input" 10 apc010  # Save ~5mins
run_bench guest-sha256 "$input" 30 apc030

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### Pairing
dir="results/pairing"
input="0" # No input

mkdir -p "$dir"
pushd "$dir"

run_bench guest-pairing-manual-precompile "$input" 0 manual
run_bench guest-pairing "$input" 0 apc000
run_bench guest-pairing "$input" 3 apc003
# run_bench guest-pairing "$input" 10 apc010  # Save ~4mins
run_bench guest-pairing "$input" 30 apc030
# run_bench guest-pairing "$input" 100 apc100  # Save ~7mins 

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### U256
dir="results/u256"
input="0" # No input

mkdir -p "$dir"
pushd "$dir"

run_bench guest-u256-manual-precompile "$input" 0 manual
run_bench guest-u256 "$input" 0 apc000
run_bench guest-u256 "$input" 3 apc003
# run_bench guest-u256 "$input" 10 apc010  # Save ~4mins
run_bench guest-u256 "$input" 30 apc030

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### Matmul
dir="results/matmul"

mkdir -p "$dir"
pushd "$dir"

run_bench guest-matmul 0 0 apc000
run_bench guest-matmul 0 3 apc003
run_bench guest-matmul 0 10 apc010
run_bench guest-matmul 0 30 apc030

python3 "$SCRIPTS_DIR"/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 "$SCRIPTS_DIR"/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 "$SCRIPTS_DIR"/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### ECC
dir="results/ecc"
input="50"

mkdir -p "$dir"
pushd "$dir"

run_bench guest-ecc-manual $input 0 manual
run_bench guest-ecc-projective $input 0 projective-apc000
run_bench guest-ecc-projective $input 3 projective-apc003
# run_bench guest-ecc-projective $input 10 projective-apc010  # Save ~12mins
run_bench guest-ecc-projective $input 30 projective-apc030
# run_bench guest-ecc-projective $input 100 projective-apc100  # Save ~12mins
run_bench guest-ecc-powdr-affine-hint $input 0 affine-hint-apc000
run_bench guest-ecc-powdr-affine-hint $input 3 affine-hint-apc003
# run_bench guest-ecc-powdr-affine-hint $input 10 affine-hint-apc010  # Save ~7mins
run_bench guest-ecc-powdr-affine-hint $input 30 affine-hint-apc030
# run_bench guest-ecc-powdr-affine-hint $input 100 affine-hint-apc100  # Save ~7mins

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

### ECRECOVER
dir="results/ecrecover"
input="20"

mkdir -p "$dir"
pushd "$dir"

run_bench guest-ecrecover-manual $input 0 manual
run_bench guest-ecrecover $input 0 apc000
run_bench guest-ecrecover $input 3 apc003
# run_bench guest-ecrecover $input 10 apc010  # Save ~6mins
run_bench guest-ecrecover $input 30 apc030
# run_bench guest-ecrecover $input 100 apc100  # Save ~6mins

python3 $SCRIPTS_DIR/basic_metrics.py summary-table --csv **/metrics.json > basic_metrics.csv
python3 $SCRIPTS_DIR/basic_metrics.py plot **/metrics.json -o proof_time_breakdown.png
python3 $SCRIPTS_DIR/basic_metrics.py combine **/metrics.json > combined_metrics.json
popd

================================================
FILE: openvm-riscv/src/isa/instruction_formatter.rs
================================================
use super::opcode::*;
use openvm_instructions::{instruction::Instruction, VmOpcode};
use openvm_stark_backend::p3_field::PrimeField32;
use powdr_openvm::format_fe;

pub fn openvm_instruction_formatter<F: PrimeField32>(instruction: &Instruction<F>) -> String {
    let Instruction {
        opcode,
        a,
        b,
        c,
        d,
        e,
        f,
        g,
    } = instruction;
    let opcode_number = opcode.as_usize();
    let opcode_name = openvm_opcode_formatter(opcode);

    match opcode_number {
        // Alu instructions, see:
        // https://github.com/openvm-org/openvm/blob/v1.0.0/extensions/rv32im/circuit/src/adapters/alu.rs#L197-L201
        512..=521 => {
            assert_eq!(d, &F::ONE);
            assert_eq!(f, &F::ZERO);
            assert_eq!(g, &F::ZERO);

            format!("{opcode_name} rd_ptr = {a}, rs1_ptr = {b}, rs2 = {c}, rs2_as = {e}")
        }

        // Load/Store instructions, see:
        // https://github.com/openvm-org/openvm/blob/v1.0.0/extensions/rv32im/circuit/src/adapters/loadstore.rs#L340-L346
        528..=535 => {
            assert_eq!(d, &F::ONE);

            format!("{opcode_name} rd_rs2_ptr = {a}, rs1_ptr = {b}, imm = {c}, mem_as = {e}, needs_write = {f}, imm_sign = {g}")
        }
        OPCODE_BLT | OPCODE_BLTU | OPCODE_BGE | OPCODE_BGEU | OPCODE_BEQ | OPCODE_BNE => {
            let c = format_fe(*c);
            format!("{opcode_name} {a} {b} {c} {d} {e}")
        }

        // All other opcodes in the list
        x if ALL_OPCODES.contains(&x) => format!("{opcode_name} {a} {b} {c} {d} {e}"),

        // Opcodes not in the list
        _ => format!("{opcode_name} {a} {b} {c} {d} {e} {f} {g}"),
    }
}

pub fn openvm_opcode_formatter(opcode: &VmOpcode) -> String {
    // Opcodes taken from:
    // https://github.com/openvm-org/openvm/blob/v1.0.0/extensions/rv32im/transpiler/src/instructions.rs
    match opcode.as_usize() {
        // Rv32BaseAluChip opcodes
        OPCODE_ADD => "ADD".to_string(),
        OPCODE_SUB => "SUB".to_string(),
        OPCODE_XOR => "XOR".to_string(),
        OPCODE_OR => "OR".to_string(),
        OPCODE_AND => "AND".to_string(),
        // Rv32ShiftChip opcodes
        OPCODE_SLL => "SLL".to_string(),
        OPCODE_SRL => "SRL".to_string(),
        OPCODE_SRA => "SRA".to_string(),
        // Rv32LessThanChip opcodes
        OPCODE_SLT => "SLT".to_string(),
        OPCODE_SLTU => "SLTU".to_string(),
        // Load/Store opcodes
        OPCODE_LOADW => "LOADW".to_string(),
        OPCODE_LOADBU => "LOADBU".to_string(),
        OPCODE_LOADHU => "LOADHU".to_string(),
        OPCODE_STOREW => "STOREW".to_string(),
        OPCODE_STOREH => "STOREH".to_string(),
        OPCODE_STOREB => "STOREB".to_string(),
        OPCODE_LOADB => "LOADB".to_string(),
        OPCODE_LOADH => "LOADH".to_string(),
        // Other opcodes
        OPCODE_BEQ => "BEQ".to_string(),
        OPCODE_BNE => "BNE".to_string(),
        OPCODE_BLT => "BLT".to_string(),
        OPCODE_BLTU => "BLTU".to_string(),
        OPCODE_BGE => "BGE".to_string(),
        OPCODE_BGEU => "BGEU".to_string(),
        OPCODE_JAL => "JAL".to_string(),
        OPCODE_LUI => "LUI".to_string(),
        OPCODE_JALR => "JALR".to_string(),
        OPCODE_AUIPC => "AUIPC".to_string(),
        OPCODE_MUL => "MUL".to_string(),
        OPCODE_MULH => "MULH".to_string(),
        OPCODE_MULHSU => "MULHSU".to_string(),
        OPCODE_MULHU => "MULHU".to_string(),
        OPCODE_DIV => "DIV".to_string(),
        OPCODE_DIVU => "DIVU".to_string(),
        OPCODE_REM => "REM".to_string(),
        OPCODE_REMU => "REMU".to_string(),
        OPCODE_HINT_STOREW => "HINT_STOREW".to_string(),
        OPCODE_HINT_BUFFER => "HINT_BUFFER".to_string(),
        // Bigint opcodes
        BIGINT_OPCODE_BEQ => "BIGINT_BEQ".to_string(),
        BIGINT_OPCODE_BNE => "BIGINT_BNE".to_string(),
        BIGINT_OPCODE_BLT => "BIGINT_BLT".to_string(),
        BIGINT_OPCODE_BLTU => "BIGINT_BLTU".to_string(),
        BIGINT_OPCODE_BGE => "BIGINT_BGE".to_string(),
        BIGINT_OPCODE_BGEU => "BIGINT_BGEU".to_string(),
        other => format!("<opcode {other}>"),
    }
}


================================================
FILE: openvm-riscv/src/isa/mod.rs
================================================
use std::collections::{BTreeSet, HashSet};

use openvm_circuit::arch::{AirInventory, ChipInventoryError, VmBuilder};
use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, VmOpcode};
use openvm_stark_backend::p3_field::PrimeField32;
use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Engine;
#[cfg(feature = "cuda")]
use powdr_openvm::{
    isa::OriginalGpuChipComplex, powdr_extension::trace_generator::SharedPeripheryChipsGpu,
};
use powdr_openvm::{
    isa::{OpenVmISA, OriginalCpuChipComplex},
    powdr_extension::trace_generator::cpu::SharedPeripheryChipsCpu,
    program::OriginalCompiledProgram,
    BabyBearSC, SpecializedExecutor,
};
use powdr_riscv_elf::{debug_info::SymbolTable, ElfProgram};
use serde::{Deserialize, Serialize};

#[cfg(feature = "cuda")]
use crate::ExtendedVmConfigGpuBuilder;
use crate::{
    isa::{
        opcode::{branch_opcodes_bigint_set, branch_opcodes_set, instruction_allowlist},
        trace_generator::{create_dummy_airs, create_dummy_chip_complex_cpu},
    },
    ExtendedVmConfig, ExtendedVmConfigCpuBuilder, ExtendedVmConfigExecutor,
};

pub mod instruction_formatter;
pub mod opcode;
pub mod symbolic_instruction_builder;
/// The trace generator for the powdr instructions
pub mod trace_generator;

// Clone should not be required
#[derive(Clone, Default)]
pub struct RiscvISA;

/// A type to represent register addresses during execution
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct OpenVmRegisterAddress(u8);

// This seems trivial but it's tricky to put into powdr-openvm  because of some From implementation issues.
impl<F: PrimeField32> From<ExtendedVmConfigExecutor<F>> for SpecializedExecutor<F, RiscvISA> {
    fn from(value: ExtendedVmConfigExecutor<F>) -> Self {
        Self::OriginalExecutor(value)
    }
}

impl OpenVmISA for RiscvISA {
    type Executor<F: PrimeField32> = ExtendedVmConfigExecutor<F>;
    type Config = ExtendedVmConfig;
    type CpuBuilder = ExtendedVmConfigCpuBuilder;
    #[cfg(feature = "cuda")]
    type GpuBuilder = ExtendedVmConfigGpuBuilder;

    fn branching_opcodes() -> HashSet<VmOpcode> {
        branch_opcodes_set()
    }

    fn format<F: PrimeField32>(instruction: &Instruction<F>) -> String {
        instruction_formatter::openvm_instruction_formatter(instruction)
    }

    fn allowed_opcodes() -> HashSet<VmOpcode> {
        instruction_allowlist()
    }

    fn create_original_chip_complex(
        config: &Self::Config,
        airs: AirInventory<BabyBearSC>,
    ) -> Result<OriginalCpuChipComplex, ChipInventoryError> {
        <ExtendedVmConfigCpuBuilder as VmBuilder<BabyBearPoseidon2Engine>>::create_chip_complex(
            &ExtendedVmConfigCpuBuilder,
            config,
            airs,
        )
    }

    type LinkedProgram<'a> = ElfProgram;

    fn get_symbol_table<'a>(program: &Self::LinkedProgram<'a>) -> SymbolTable {
        let debug_info = program.debug_info();
        let labels = SymbolTable::from_table(
            debug_info
                .symbols
                .table()
                .iter()
                .map(|(addr, names)| {
                    (
                        *addr,
                        names
                            .iter()
                            .map(|name| rustc_demangle::demangle(name).to_string())
                            .collect(),
                    )
                })
                .collect(),
        );

        labels
    }

    fn get_jump_destinations(program: &OriginalCompiledProgram<Self>) -> BTreeSet<u64> {
        let labels = program.linked_program.text_labels();

        let jump_dest = add_extra_targets(program, labels.clone(), DEFAULT_PC_STEP);

        jump_dest.into_iter().map(Into::into).collect()
    }

    fn create_dummy_airs<E: openvm_circuit::arch::VmCircuitExtension<powdr_openvm::BabyBearSC>>(
        config: &Self::Config,
        shared_chips: E,
    ) -> Result<AirInventory<powdr_openvm::BabyBearSC>, openvm_circuit::arch::AirInventoryError>
    {
        create_dummy_airs(config, shared_chips)
    }

    fn create_dummy_chip_complex_cpu(
        config: &Self::Config,
        circuit: AirInventory<powdr_openvm::BabyBearSC>,
        shared_chips: SharedPeripheryChipsCpu<Self>,
    ) -> Result<OriginalCpuChipComplex, ChipInventoryError> {
        create_dummy_chip_complex_cpu(config, circuit, shared_chips)
    }

    #[cfg(feature = "cuda")]
    fn create_dummy_chip_complex_gpu(
        config: &Self::Config,
        circuit: AirInventory<powdr_openvm::BabyBearSC>,
        shared_chips: SharedPeripheryChipsGpu<Self>,
    ) -> Result<OriginalGpuChipComplex, ChipInventoryError> {
        use crate::isa::trace_generator::create_dummy_chip_complex_gpu;

        create_dummy_chip_complex_gpu(config, circuit, shared_chips)
    }
}

/// Besides the base RISC-V branching instructions, the bigint extension adds two more branching
/// instruction classes over BranchEqual and BranchLessThan.
/// Those instructions have the form <INSTR rs0 rs1 target_offset ...>, where target_offset is the
/// relative jump we're interested in.
/// This means that for a given program address A containing the instruction above,
/// we add A + target_offset as a target as well.
fn add_extra_targets(
    compiled_program: &OriginalCompiledProgram<RiscvISA>,
    mut labels: BTreeSet<u32>,
    pc_step: u32,
) -> BTreeSet<u32> {
    let branch_opcodes_bigint = branch_opcodes_bigint_set();
    let program = &compiled_program.exe.program;
    let new_labels = program
        .instructions_and_debug_infos
        .iter()
        .enumerate()
        .filter_map(|(i, instr)| {
            let instr = instr.as_ref().unwrap().0.clone();
            let adjusted_pc = program.pc_base + (i as u32) * pc_step;
            let op = instr.opcode;
            branch_opcodes_bigint
                .contains(&op)
                .then_some(adjusted_pc + instr.c.as_canonical_u32())
        });
    labels.extend(new_labels);

    labels
}


================================================
FILE: openvm-riscv/src/isa/opcode.rs
================================================
use std::collections::HashSet;

use openvm_bigint_transpiler::{Rv32BranchEqual256Opcode, Rv32BranchLessThan256Opcode};
use openvm_instructions::{LocalOpcode, VmOpcode};
use openvm_rv32im_transpiler::*;

/// Defines each opcode as a `pub const usize` and also generates
/// a `pub const ALL_OPCODES: &[usize]` containing all of them.
macro_rules! define_opcodes {
    (
        // Non-bigint opcodes
        // e.g. OPCODE_BEQ = BranchEqualOpcode::BEQ as usize + BranchEqualOpcode::CLASS_OFFSET
        $( $non_big_int_name:ident = $ty:ident :: $variant:ident, )*
        ; // Intentional pattern split delimiter
        // Bigint opcodes
        // e.g. BIGINT_OPCODE_BEQ = BranchEqualOpcode::BEQ as usize + Rv32BranchEqual256Opcode::CLASS_OFFSET
        $( $bigint_name:ident = $big_ty:ident ; $small_ty:ident :: $small_variant:ident, )*
    ) => {
        $(
            pub const $non_big_int_name: usize = (
                $ty::$variant as usize
                + < $ty as LocalOpcode >::CLASS_OFFSET
            ) as usize;
        )*

        $(
            pub const $bigint_name: usize = (
                $small_ty::$small_variant as usize
                + < $big_ty as LocalOpcode >::CLASS_OFFSET
            ) as usize;
        )*

        /// All opcodes in one slice.
        pub const ALL_OPCODES: &[usize] = &[
            $( $non_big_int_name, )*
            $( $bigint_name, )*
        ];

        /// All opcodes except bigint in one slice.
        pub const ALL_OPCODES_EXCEPT_BIGINT: &[usize] = &[
            $( $non_big_int_name, )*
        ];

    }
}

define_opcodes!(
    // Rv32BaseAluChip
    OPCODE_ADD = BaseAluOpcode::ADD,
    OPCODE_SUB = BaseAluOpcode::SUB,
    OPCODE_XOR = BaseAluOpcode::XOR,
    OPCODE_OR = BaseAluOpcode::OR,
    OPCODE_AND = BaseAluOpcode::AND,
    // Rv32ShiftChip opcodes
    OPCODE_SLL = ShiftOpcode::SLL,
    OPCODE_SRL = ShiftOpcode::SRL,
    OPCODE_SRA = ShiftOpcode::SRA,
    // Rv32LessThanChip opcodes
    OPCODE_SLT = LessThanOpcode::SLT,
    OPCODE_SLTU = LessThanOpcode::SLTU,
    // Load/Store opcodes
    OPCODE_LOADW = Rv32LoadStoreOpcode::LOADW,
    OPCODE_LOADBU = Rv32LoadStoreOpcode::LOADBU,
    OPCODE_LOADHU = Rv32LoadStoreOpcode::LOADHU,
    OPCODE_STOREW = Rv32LoadStoreOpcode::STOREW,
    OPCODE_STOREH = Rv32LoadStoreOpcode::STOREH,
    OPCODE_STOREB = Rv32LoadStoreOpcode::STOREB,
    OPCODE_LOADB = Rv32LoadStoreOpcode::LOADB,
    OPCODE_LOADH = Rv32LoadStoreOpcode::LOADH,
    // Other opcodes
    OPCODE_BEQ = BranchEqualOpcode::BEQ,
    OPCODE_BNE = BranchEqualOpcode::BNE,
    OPCODE_BLT = BranchLessThanOpcode::BLT,
    OPCODE_BLTU = BranchLessThanOpcode::BLTU,
    OPCODE_BGE = BranchLessThanOpcode::BGE,
    OPCODE_BGEU = BranchLessThanOpcode::BGEU,
    OPCODE_JAL = Rv32JalLuiOpcode::JAL,
    OPCODE_LUI = Rv32JalLuiOpcode::LUI,
    OPCODE_JALR = Rv32JalrOpcode::JALR,
    OPCODE_AUIPC = Rv32AuipcOpcode::AUIPC,
    OPCODE_MUL = MulOpcode::MUL,
    OPCODE_MULH = MulHOpcode::MULH,
    OPCODE_MULHSU = MulHOpcode::MULHSU,
    OPCODE_MULHU = MulHOpcode::MULHU,
    OPCODE_DIV = DivRemOpcode::DIV,
    OPCODE_DIVU = DivRemOpcode::DIVU,
    OPCODE_REM = DivRemOpcode::REM,
    OPCODE_REMU = DivRemOpcode::REMU,
    OPCODE_HINT_STOREW = Rv32HintStoreOpcode::HINT_STOREW,
    OPCODE_HINT_BUFFER = Rv32HintStoreOpcode::HINT_BUFFER,
    ; // Intentional pattern split delimiter
    // Bigint opcodes
    BIGINT_OPCODE_BEQ = Rv32BranchEqual256Opcode; BranchEqualOpcode::BEQ,
    BIGINT_OPCODE_BNE = Rv32BranchEqual256Opcode; BranchEqualOpcode::BNE,
    BIGINT_OPCODE_BLT = Rv32BranchLessThan256Opcode; BranchLessThanOpcode::BLT,
    BIGINT_OPCODE_BLTU = Rv32BranchLessThan256Opcode; BranchLessThanOpcode::BLTU,
    BIGINT_OPCODE_BGE = Rv32BranchLessThan256Opcode; BranchLessThanOpcode::BGE,
    BIGINT_OPCODE_BGEU = Rv32BranchLessThan256Opcode; BranchLessThanOpcode::BGEU,
);

pub const BRANCH_OPCODES_BIGINT: &[usize] = &[
    BIGINT_OPCODE_BEQ,
    BIGINT_OPCODE_BNE,
    BIGINT_OPCODE_BLT,
    BIGINT_OPCODE_BLTU,
    BIGINT_OPCODE_BGE,
    BIGINT_OPCODE_BGEU,
];

pub const BRANCH_OPCODES: &[usize] = &[
    OPCODE_BEQ,
    OPCODE_BNE,
    OPCODE_BLT,
    OPCODE_BLTU,
    OPCODE_BGE,
    OPCODE_BGEU,
    OPCODE_JAL,
    OPCODE_JALR,
];

// Allowed opcodes = ALL_OPCODES_EXCEPT_BIGINT - HINT_STOREW - HINT_BUFFER
pub fn instruction_allowlist() -> HashSet<VmOpcode> {
    // Filter out HINT_STOREW and HINT_BUFFER, which contain next references that don't work with apc
    ALL_OPCODES_EXCEPT_BIGINT
        .iter()
        .copied()
        .filter(|&op| op != OPCODE_HINT_BUFFER && op != OPCODE_HINT_STOREW)
        .map(VmOpcode::from_usize)
        .collect()
}

pub fn branch_opcodes_bigint_set() -> HashSet<VmOpcode> {
    let mut set = HashSet::new();
    set.extend(
        BRANCH_OPCODES_BIGINT
            .iter()
            .cloned()
            .map(VmOpcode::from_usize),
    );
    set
}

pub fn branch_opcodes_set() -> HashSet<VmOpcode> {
    let mut set = branch_opcodes_bigint_set();
    set.extend(BRANCH_OPCODES.iter().cloned().map(VmOpcode::from_usize));
    set
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_all_opcodes() {
        let expected = &[
            512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 533, 534,
            535, 544, 545, 549, 550, 551, 552, 560, 561, 565, 576, 592, 593, 594, 595, 596, 597,
            598, 599, 608, 609, 1056, 1057, 1061, 1062, 1063, 1064,
        ];
        assert_eq!(ALL_OPCODES.len(), 44); // 38 non-bigint + 6 bigint
        assert_eq!(ALL_OPCODES, expected);
    }

    #[test]
    fn test_all_opcodes_except_bigint() {
        let expected = &[
            512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 533, 534,
            535, 544, 545, 549, 550, 551, 552, 560, 561, 565, 576, 592, 593, 594, 595, 596, 597,
            598, 599, 608, 609,
        ];
        assert_eq!(ALL_OPCODES_EXCEPT_BIGINT.len(), 38); // 38 non-bigint
        assert_eq!(ALL_OPCODES_EXCEPT_BIGINT, expected);
    }

    #[test]
    fn test_instruction_allowlist() {
        let allowlist = instruction_allowlist();
        let expected = [
            512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 528, 529, 530, 531, 532, 533, 534,
            535, 544, 545, 549, 550, 551, 552, 560, 561, 565, 576, 592, 593, 594, 595, 596, 597,
            598, 599,
        ]
        .into_iter()
        .map(VmOpcode::from_usize)
        .collect();
        assert_eq!(allowlist.len(), ALL_OPCODES_EXCEPT_BIGINT.len() - 2); // Excluding HINT_STOREW and HINT_BUFFER
        assert_eq!(allowlist, expected);
    }
}


================================================
FILE: openvm-riscv/src/isa/symbolic_instruction_builder.rs
================================================
//! Builds Instruction to create input program for testing powdr_autoprecompile::build
use super::opcode::*;
use openvm_instructions::{instruction::Instruction, VmOpcode};
use openvm_stark_backend::p3_field::PrimeField32;

// Generic instructions (5 args, fixed f=0, g=0)
macro_rules! build_instr5 {
    (
        $(
            $(#[$doc:meta])*
            ($name:ident, $code:expr)
        ),+ $(,)?
    ) => {
        $(
            $(#[$doc])*
            pub fn $name<T: PrimeField32>(
                a: u32,
                b: u32,
                c: u32,
                d: u32,
                e: u32,
            ) -> Instruction<T> {
                Instruction {
                    opcode: VmOpcode::from_usize($code as usize),
                    a: T::from_canonical_u32(a),
                    b: T::from_canonical_u32(b),
                    c: T::from_canonical_u32(c),
                    d: T::from_canonical_u32(d),
                    e: T::from_canonical_u32(e),
                    f: T::ZERO,
                    g: T::ZERO,
                }
            }
        )+
    };
}

// ALU instructions (4 args, fixed d=1, f=0, g=0)
macro_rules! alu_ops {
    (
        $(
            $(#[$doc:meta])*
            ($name:ident, $code:expr)
        ),+ $(,)?
    ) => {
        $(
            $(#[$doc])*
            pub fn $name<T: PrimeField32>(
                rd_ptr: u32,
                rs1_ptr: u32,
                rs2: u32,
                rs2_as: u32,
            ) -> Instruction<T> {
                Instruction {
                    opcode: VmOpcode::from_usize($code as usize),
                    a: T::from_canonical_u32(rd_ptr),
                    b: T::from_canonical_u32(rs1_ptr),
                    c: T::from_canonical_u32(rs2),
                    d: T::ONE,
                    e: T::from_canonical_u32(rs2_as),
                    f: T::ZERO,
                    g: T::ZERO,
                }
            }
        )+
    };
}

// Load/Store and Load/Store Sign Extend instructions (6 args, fixed d=1)
macro_rules! ls_ops {
    (
        $(
            $(#[$doc:meta])*
            ($name:ident, $code:expr)
        ),+ $(,)?
    ) => {
        $(
            $(#[$doc])*
            pub fn $name<T: PrimeField32>(
                rd_rs2_ptr: u32,
                rs1_ptr: u32,
                imm: u32,
                mem_as: u32,
                needs_write: u32,
                imm_sign: u32,
            ) -> Instruction<T> {
                Instruction {
                    opcode: VmOpcode::from_usize($code as usize),
                    a: T::from_canonical_u32(rd_rs2_ptr),
                    b: T::from_canonical_u32(rs1_ptr),
                    c: T::from_canonical_u32(imm),
                    d: T::ONE,
                    e: T::from_canonical_u32(mem_as),
                    f: T::from_canonical_u32(needs_write),
                    g: T::from_canonical_u32(imm_sign),
                }
            }
        )+
    };
}

// Branch Lt and Branch Eq instructions (3 args, fixed d=1, e=1, f=0, g=0)
macro_rules! branch_ops {
    (
        $(
            $(#[$doc:meta])*
            ($name:ident, $code:expr)
        ),+ $(,)?
    ) => {
        $(
            $(#[$doc])*
            pub fn $name<T: PrimeField32>(
                rs1_ptr: u32,
                rs2_ptr: u32,
                imm: i32,
            ) -> Instruction<T> {
                let imm = if imm >= 0 {
                    T::from_canonical_u32(imm as u32)
                } else {
                    -T::from_canonical_u32((-imm) as u32)
                };
                Instruction {
                    opcode: VmOpcode::from_usize($code as usize),
                    a: T::from_canonical_u32(rs1_ptr),
                    b: T::from_canonical_u32(rs2_ptr),
                    c: imm,
                    d: T::ONE,
                    e: T::ONE,
                    f: T::ZERO,
                    g: T::ZERO,
                }
            }
        )+
    };
}

// Generic instructions
build_instr5!(
    /// Jump and link (Rdwrite adapter and JAL_LUI core):
    /// - to_pc = pc + imm
    /// - store(REG, rd_ptr, pc + 4)
    (jal, OPCODE_JAL),
    /// Load upper immediate (Rdwrite adapter and JAL_LUI core):
    /// - store(REG, rd_ptr, imm * 2^8)
    (lui, OPCODE_LUI),

    /// Jump and link register (JALR adapter and JALR core):
    /// - to_pc = load(REG, rs1_ptr) + imm
    /// - store(REG, rd_ptr, pc + 4)
    (jalr, OPCODE_JALR),

    /// Add upper immediate to PC (but does not change PC) (Rdwrite adapter and AUIPC core):
    /// - store(REG, rd_ptr, pc + imm * 2^8)
    (auipc, OPCODE_AUIPC),

    /// Multiplication (Mul adapter and Multiplication core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) * load(REG, rs2_ptr) % 2^32)
    (mul, OPCODE_MUL),

    /// Signed * signed multiplication high (Mul adapter and MULH core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) * load(REG, rs2_ptr) / 2^32), where `/` is integer division
    (mulh, OPCODE_MULH),
    /// Signed * unsigned multiplication high (Mul adapter and MULH core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) * load(REG, rs2_ptr) / 2^32), where `/` is integer division
    (mulhsu, OPCODE_MULHSU),
    /// Unsigned * unsigned multiplication high (Mul adapter and MULH core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) * load(REG, rs2_ptr) / 2^32), where `/` is integer division
    (mulhu, OPCODE_MULHU),

    /// Signed division (Mul adapter and Divrem core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) / load(REG, rs2_ptr)), where `/` is integer division
    /// - Exception: store(REG, rd_ptr, -1) if `load(REG, rs2_ptr) == 0`
    (div, OPCODE_DIV),
    /// Unsigned division (Mul adapter and Divrem core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) / load(REG, rs2_ptr)), where `/` is integer division
    /// - Exception: store(REG, rd_ptr, 2^32 - 1) if `load(REG, rs2_ptr) == 0`
    (divu, OPCODE_DIVU),
    /// Signed remainder (Mul adapter and Divrem core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) % load(REG, rs2_ptr))
    (rem, OPCODE_REM),
    /// Unsigned remainder (Mul adapter and Divrem core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) % load(REG, rs2_ptr))
    (remu, OPCODE_REMU),

    (hint_storew, OPCODE_HINT_STOREW),
    (hint_buffer, OPCODE_HINT_BUFFER)
);

// ALU instructions
alu_ops!(
    /// Addition (ALU adapter and ALU core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) + load(rs2_as, rs2))
    (add, OPCODE_ADD),
    /// Subtraction (ALU adapter and ALU core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) - load(rs2_as, rs2))
    (sub, OPCODE_SUB),
    /// XOR (ALU adapter and ALU core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) XOR load(rs2_as, rs2))
    (xor, OPCODE_XOR),
    /// OR (ALU adapter and ALU core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) OR load(rs2_as, rs2))
    (or, OPCODE_OR),
    /// AND (ALU adapter and ALU core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) AND load(rs2_as, rs2))
    (and, OPCODE_AND),

    /// Shift left (ALU adapter and Shift core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) << (load(rs2_as, rs2) % 32))
    (sll, OPCODE_SLL),
    /// Shift right (ALU adapter and Shift core):
    /// - store(REG, rd_ptr, load(REG, rs1_ptr) >> (load(rs2_as, rs2) % 32))
    (srl, OPCODE_SRL),
    /// Shift right arithmetic (signed) (ALU adapter and Shift core):
    /// - store(REG, rd_ptr, sign_extend(load(REG, rs1_ptr) >> (load(rs2_as, rs2) % 32)))
    (sra, OPCODE_SRA),

    /// Less than signed (ALU adapter and Less than core):
    /// - store(REG, rd_ptr, 1 if load(REG, rs1_ptr) < load(rs2_as, rs2) else 0)
    (slt, OPCODE_SLT),
    /// Less than unsigned (ALU adapter and Less than core):
    /// - store(REG, rd_ptr, 1 if load(REG, rs1_ptr) < load(rs2_as, rs2) else 0)
    (sltu, OPCODE_SLTU)
);

// Load/Store and Load/Store Sign Extend instructions
ls_ops!(
    /// Load word (Load/store adapter and Load sign extend core):
    /// - store(REG, rd_ptr, load(mem_as, val(rs1) + imm)), where val(rs1) = load(REG, rs1_ptr)
    (loadw, OPCODE_LOADW),
    /// Load byte unsigned (Load/store adapter and Load sign extend core):
    /// - store(REG, rd_ptr, load_byte_unsigned(mem_as, val(rs1) + imm)), where val(rs1) = load(REG, rs1_ptr)
    (loadbu, OPCODE_LOADBU),
    /// Load half-word unsigned (Load/store adapter and Load sign extend core):
    /// - store(REG, rd_ptr, load_half_word_unsigned(mem_as, val(rs1) + imm)), where val(rs1) = load(REG, rs1_ptr)
    (loadhu, OPCODE_LOADHU),

    /// Store word (Load/store adapter and Loadstore core):
    /// - store(mem_as, val(rs1) + imm, load(REG, rd_ptr)), where val(rs1) = load(REG, rs1_ptr)
    (storew, OPCODE_STOREW),
    /// Store half-word (Load/store adapter and Loadstore core):
    /// - store_half_word(mem_as, val(rs1) + imm, load(REG, rd_ptr)), where val(rs1) = load(REG, rs1_ptr)
    (storeh, OPCODE_STOREH),
    /// Store byte (Load/store adapter and Loadstore core):
    /// - store_byte(mem_as, val(rs1) + imm, load(REG, rd_ptr)), where val(rs1) = load(REG, rs1_ptr)
    (storeb, OPCODE_STOREB),

    /// Load byte signed (Load/store adapter and Load sign extend core):
    /// - store(REG, rd_ptr, load_byte_signed(mem_as, val(rs1) + imm)), where val(rs1) = load(REG, rs1_ptr)
    (loadb, OPCODE_LOADB),
    /// Load half-word signed (Load/store adapter and Load sign extend core):
    /// - store(REG, rd_ptr, load_half_word_signed(mem_as, val(rs1) + imm)), where val(rs1) = load(REG, rs1_ptr)
    (loadh, OPCODE_LOADH)
);

// Branch Eq and Branch Lt instructions
branch_ops!(
    /// Branch equal (Branch adapter and Branch Eq core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) == load(REG, rs2_ptr) else pc + 4
    (beq, OPCODE_BEQ),
    /// Branch not equal (Branch adapter and Branch Eq core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) != load(REG, rs2_ptr) else pc + 4
    (bne, OPCODE_BNE),

    /// Branch less than signed (Branch adapter and Branch Lt core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) < load(REG, rs2_ptr) else pc + 4
    (blt, OPCODE_BLT),
    /// Branch less than unsigned (Branch adapter and Branch Lt core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) < load(REG, rs2_ptr) else pc + 4
    (bltu, OPCODE_BLTU),
    /// Branch greater than or equal signed (Branch adapter and Branch Lt core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) >= load(REG, rs2_ptr) else pc + 4
    (bge, OPCODE_BGE),
    /// Branch greater than or equal unsigned (Branch adapter and Branch Lt core):
    /// - to_pc = pc + imm if load(REG, rs1_ptr) >= load(REG, rs2_ptr) else pc + 4
    (bgeu, OPCODE_BGEU),
);


================================================
FILE: openvm-riscv/src/isa/trace_generator/common.rs
================================================
use openvm_circuit::arch::{AirInventory, AirInventoryError, VmCircuitConfig, VmCircuitExtension};
use powdr_openvm::BabyBearSC;

use crate::ExtendedVmConfig;

pub fn create_dummy_airs<E: VmCircuitExtension<BabyBearSC>>(
    config: &ExtendedVmConfig,
    shared_chips: E,
) -> Result<AirInventory<BabyBearSC>, AirInventoryError> {
    let config = config.sdk.to_inner();
    let mut inventory = config.system.create_airs()?;

    // CHANGE: add dummy periphery
    inventory.start_new_extension();
    VmCircuitExtension::extend_circuit(&shared_chips, &mut inventory)?;
    // END CHANGE

    if let Some(rv32i) = &config.rv32i {
        VmCircuitExtension::extend_circuit(rv32i, &mut inventory)?;
    }
    if let Some(io) = &config.io {
        VmCircuitExtension::extend_circuit(io, &mut inventory)?;
    }
    if let Some(keccak) = &config.keccak {
        VmCircuitExtension::extend_circuit(keccak, &mut inventory)?;
    }
    if let Some(sha256) = &config.sha256 {
        VmCircuitExtension::extend_circuit(sha256, &mut inventory)?;
    }
    if let Some(native) = &config.native {
        VmCircuitExtension::extend_circuit(native, &mut inventory)?;
    }
    if let Some(castf) = &config.castf {
        VmCircuitExtension::extend_circuit(castf, &mut inventory)?;
    }
    if let Some(rv32m) = &config.rv32m {
        VmCircuitExtension::extend_circuit(rv32m, &mut inventory)?;
    }
    if let Some(bigint) = &config.bigint {
        VmCircuitExtension::extend_circuit(bigint, &mut inventory)?;
    }
    if let Some(modular) = &config.modular {
        VmCircuitExtension::extend_circuit(modular, &mut inventory)?;
    }
    if let Some(fp2) = &config.fp2 {
        VmCircuitExtension::extend_circuit(fp2, &mut inventory)?;
    }
    if let Some(pairing) = &config.pairing {
        VmCircuitExtension::extend_circuit(pairing, &mut inventory)?;
    }
    if let Some(ecc) = &config.ecc {
        VmCircuitExtension::extend_circuit(ecc, &mut inventory)?;
    }
    Ok(inventory)
}


================================================
FILE: openvm-riscv/src/isa/trace_generator/cpu.rs
================================================
use openvm_algebra_circuit::AlgebraCpuProverExt;
use openvm_bigint_circuit::Int256CpuProverExt;
use openvm_circuit::arch::{AirInventory, ChipInventoryError, VmBuilder, VmProverExtension};
use openvm_circuit::system::SystemCpuBuilder;
use openvm_keccak256_circuit::Keccak256CpuProverExt;
use openvm_native_circuit::NativeCpuProverExt;
use openvm_pairing_circuit::PairingProverExt;
use openvm_rv32im_circuit::Rv32ImCpuProverExt;
use openvm_sha256_circuit::Sha2CpuProverExt;
use powdr_openvm::powdr_extension::trace_generator::cpu::SharedPeripheryChipsCpuProverExt;
use powdr_openvm::powdr_extension::trace_generator::{DummyChipComplex, SharedPeripheryChipsCpu};
use powdr_openvm::BabyBearSC;

use crate::{ExtendedVmConfig, RiscvISA};

use openvm_ecc_circuit::EccCpuProverExt;
use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Engine;

pub fn create_dummy_chip_complex_cpu(
    config: &ExtendedVmConfig,
    circuit: AirInventory<BabyBearSC>,
    shared_chips: SharedPeripheryChipsCpu<RiscvISA>,
) -> Result<DummyChipComplex<BabyBearSC>, ChipInventoryError> {
    let config = config.sdk.to_inner();
    let mut chip_complex = VmBuilder::<BabyBearPoseidon2Engine>::create_chip_complex(
        &SystemCpuBuilder,
        &config.system,
        circuit,
    )?;
    let inventory = &mut chip_complex.inventory;

    // CHANGE: inject the periphery chips so that they are not created by the extensions. This is done for memory footprint: the dummy periphery chips are thrown away anyway, so we reuse a single one for all APCs.
    VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
        &SharedPeripheryChipsCpuProverExt,
        &shared_chips,
        inventory,
    )?;
    // END CHANGE

    if let Some(rv32i) = &config.rv32i {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Rv32ImCpuProverExt,
            rv32i,
            inventory,
        )?;
    }
    if let Some(io) = &config.io {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Rv32ImCpuProverExt,
            io,
            inventory,
        )?;
    }
    if let Some(keccak) = &config.keccak {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Keccak256CpuProverExt,
            keccak,
            inventory,
        )?;
    }
    if let Some(sha256) = &config.sha256 {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Sha2CpuProverExt,
            sha256,
            inventory,
        )?;
    }
    if let Some(native) = &config.native {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &NativeCpuProverExt,
            native,
            inventory,
        )?;
    }
    if let Some(castf) = &config.castf {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &NativeCpuProverExt,
            castf,
            inventory,
        )?;
    }
    if let Some(rv32m) = &config.rv32m {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Rv32ImCpuProverExt,
            rv32m,
            inventory,
        )?;
    }
    if let Some(bigint) = &config.bigint {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &Int256CpuProverExt,
            bigint,
            inventory,
        )?;
    }
    if let Some(modular) = &config.modular {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &AlgebraCpuProverExt,
            modular,
            inventory,
        )?;
    }
    if let Some(fp2) = &config.fp2 {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &AlgebraCpuProverExt,
            fp2,
            inventory,
        )?;
    }
    if let Some(pairing) = &config.pairing {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &PairingProverExt,
            pairing,
            inventory,
        )?;
    }
    if let Some(ecc) = &config.ecc {
        VmProverExtension::<BabyBearPoseidon2Engine, _, _>::extend_prover(
            &EccCpuProverExt,
            ecc,
            inventory,
        )?;
    }

    Ok(chip_complex)
}


================================================
FILE: openvm-riscv/src/isa/trace_generator/cuda.rs
================================================
use openvm_circuit::{
    arch::{AirInventory, ChipInventoryError, VmBuilder, VmProverExtension},
    system::cuda::extensions::SystemGpuBuilder,
};
use openvm_pairing_circuit::PairingProverExt;
use powdr_openvm::{
    powdr_extension::trace_generator::cuda::{
        GpuDummyChipComplex, SharedPeripheryChipsGpu, SharedPeripheryChipsGpuProverExt,
    },
    BabyBearSC, GpuBabyBearPoseidon2Engine,
};

use crate::{ExtendedVmConfig, RiscvISA};

pub fn create_dummy_chip_complex_gpu(
    config: &ExtendedVmConfig,
    circuit: AirInventory<BabyBearSC>,
    shared_chips: SharedPeripheryChipsGpu<RiscvISA>,
) -> Result<GpuDummyChipComplex<BabyBearSC>, ChipInventoryError> {
    use openvm_algebra_circuit::AlgebraProverExt;
    use openvm_bigint_circuit::Int256GpuProverExt;
    use openvm_ecc_circuit::EccProverExt;
    use openvm_keccak256_circuit::Keccak256GpuProverExt;
    use openvm_native_circuit::NativeGpuProverExt;
    use openvm_rv32im_circuit::Rv32ImGpuProverExt;
    use openvm_sha256_circuit::Sha256GpuProverExt;

    type E = GpuBabyBearPoseidon2Engine;

    let config = config.sdk.to_inner();
    let mut chip_complex =
        VmBuilder::<E>::create_chip_complex(&SystemGpuBuilder, &config.system, circuit)?;
    let inventory = &mut chip_complex.inventory;

    // CHANGE: inject the periphery chips so that they are not created by the extensions. This is done for memory footprint: the dummy periphery chips are thrown away anyway, so we reuse a single one for all APCs.
    VmProverExtension::<E, _, _>::extend_prover(
        &SharedPeripheryChipsGpuProverExt,
        &shared_chips,
        inventory,
    )?;
    // END CHANGE

    if let Some(rv32i) = &config.rv32i {
        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImGpuProverExt, rv32i, inventory)?;
    }
    if let Some(io) = &config.io {
        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImGpuProverExt, io, inventory)?;
    }
    if let Some(keccak) = &config.keccak {
        VmProverExtension::<E, _, _>::extend_prover(&Keccak256GpuProverExt, keccak, inventory)?;
    }
    if let Some(sha256) = &config.sha256 {
        VmProverExtension::<E, _, _>::extend_prover(&Sha256GpuProverExt, sha256, inventory)?;
    }
    if let Some(native) = &config.native {
        VmProverExtension::<E, _, _>::extend_prover(&NativeGpuProverExt, native, inventory)?;
    }
    if let Some(castf) = &config.castf {
        VmProverExtension::<E, _, _>::extend_prover(&NativeGpuProverExt, castf, inventory)?;
    }
    if let Some(rv32m) = &config.rv32m {
        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImGpuProverExt, rv32m, inventory)?;
    }
    if let Some(bigint) = &config.bigint {
        VmProverExtension::<E, _, _>::extend_prover(&Int256GpuProverExt, bigint, inventory)?;
    }
    if let Some(modular) = &config.modular {
        VmProverExtension::<E, _, _>::extend_prover(&AlgebraProverExt, modular, inventory)?;
    }
    if let Some(fp2) = &config.fp2 {
        VmProverExtension::<E, _, _>::extend_prover(&AlgebraProverExt, fp2, inventory)?;
    }
    if let Some(pairing) = &config.pairing {
        VmProverExtension::<E, _, _>::extend_prover(&PairingProverExt, pairing, inventory)?;
    }
    if let Some(ecc) = &config.ecc {
        VmProverExtension::<E, _, _>::extend_prover(&EccProverExt, ecc, inventory)?;
    }
    Ok(chip_complex)
}


================================================
FILE: openvm-riscv/src/isa/trace_generator/mod.rs
================================================
mod cpu;
#[cfg(feature = "cuda")]
mod cuda;

mod common;
pub use common::create_dummy_airs;
pub use cpu::create_dummy_chip_complex_cpu;
#[cfg(feature = "cuda")]
pub use cuda::create_dummy_chip_complex_gpu;


================================================
FILE: openvm-riscv/src/lib.rs
================================================
#![cfg_attr(feature = "tco", allow(internal_features))]
#![cfg_attr(feature = "tco", allow(incomplete_features))]
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
#![cfg_attr(feature = "tco", feature(core_intrinsics))]

use eyre::Result;
use openvm_build::{build_guest_package, find_unique_executable, get_package, TargetFilter};
use openvm_circuit::arch::execution_mode::metered::segment_ctx::SegmentationLimits;
#[cfg(feature = "cuda")]
use openvm_circuit::arch::DenseRecordArena;
use openvm_circuit::arch::{
    debug_proving_ctx, AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena,
    SystemConfig, VmBuilder, VmChipComplex, VmProverExtension,
};
#[cfg(feature = "cuda")]
use openvm_circuit::system::cuda::SystemChipInventoryGPU;
use openvm_circuit::system::SystemChipInventory;
use openvm_sdk::config::SdkVmCpuBuilder;

use openvm_sdk::config::TranspilerConfig;
use openvm_sdk::prover::{verify_app_proof, AggStarkProver};
use openvm_sdk::{
    config::{AppConfig, SdkVmConfig, SdkVmConfigExecutor, DEFAULT_APP_LOG_BLOWUP},
    Sdk, StdIn,
};
use openvm_stark_backend::config::Val;
use openvm_stark_backend::engine::StarkEngine;
use openvm_stark_backend::prover::cpu::{CpuBackend, CpuDevice};
use openvm_stark_sdk::config::FriParameters;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use openvm_transpiler::transpiler::Transpiler;
use powdr_autoprecompiles::empirical_constraints::EmpiricalConstraints;
use powdr_autoprecompiles::pgo::{CellPgo, InstructionPgo, NonePgo};
use powdr_autoprecompiles::PowdrConfig;
use powdr_openvm::customize_exe::OpenVmApcCandidate;
use powdr_openvm::extraction_utils::OriginalVmConfig;
use powdr_openvm::trace_generation::do_with_trace;
use powdr_openvm::BabyBearSC;
#[cfg(not(feature = "cuda"))]
use powdr_openvm::PowdrSdkCpu;
#[cfg(feature = "cuda")]
use powdr_openvm::{GpuBabyBearPoseidon2Engine, GpuBackend, PowdrSdkGpu};
use powdr_openvm_riscv_hints_circuit::{HintsExtension, HintsExtensionExecutor, HintsProverExt};
use powdr_openvm_riscv_hints_transpiler::HintsTranspilerExtension;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};

pub use crate::isa::RiscvISA;
pub use crate::isa::{instruction_formatter, symbolic_instruction_builder};
pub use powdr_openvm::program::{CompiledProgram, OriginalCompiledProgram};

pub mod isa;

pub use powdr_autoprecompiles::DegreeBound;
pub use powdr_autoprecompiles::PgoConfig;

pub use powdr_openvm_bus_interaction_handler::bus_map;

pub use powdr_openvm::empirical_constraints::detect_empirical_constraints;
pub use powdr_openvm::{
    default_powdr_openvm_config, DEFAULT_DEGREE_BOUND, DEFAULT_OPENVM_DEGREE_BOUND,
};

pub use openvm_build::GuestOptions;
pub use powdr_autoprecompiles::bus_map::BusType;
pub use powdr_openvm::customize_exe::customize;
pub use powdr_openvm::customize_exe::Instr;

pub fn build_elf_path<P: AsRef<Path>>(
    guest_opts: GuestOptions,
    pkg_dir: P,
    target_filter: &Option<TargetFilter>,
) -> Result<PathBuf> {
    let pkg = get_package(pkg_dir.as_ref());
    let target_dir = match build_guest_package(&pkg, &guest_opts, None, target_filter) {
        Ok(target_dir) => target_dir,
        Err(Some(code)) => {
            return Err(eyre::eyre!("Failed to build guest: code = {}", code));
        }
        Err(None) => {
            return Err(eyre::eyre!(
                "Failed to build guest (OPENVM_SKIP_BUILD is set)"
            ));
        }
    };

    find_unique_executable(pkg_dir, target_dir, target_filter)
}

// compile the original openvm program without powdr extension
pub fn compile_openvm(
    guest: &str,
    guest_opts: GuestOptions,
) -> Result<OriginalCompiledProgram<'static, RiscvISA>, Box<dyn std::error::Error>> {
    // Build the ELF with guest options and a target filter.
    // We need these extra Rust flags to get the labels.
    let guest_opts = guest_opts.with_rustc_flags(vec!["-C", "link-arg=--emit-relocs"]);

    // Point to our local guest
    use std::path::PathBuf;
    let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
    path.push(guest);
    let target_path = path.to_str().unwrap();

    // try to load the sdk config from the openvm.toml file, otherwise use the default
    let openvm_toml_path = path.join("openvm.toml");
    let app_config = if openvm_toml_path.exists() {
        let toml = std::fs::read_to_string(&openvm_toml_path)?;
        toml::from_str(&toml)?
    } else {
        AppConfig::riscv32()
    };

    let mut sdk = Sdk::new(app_config)?;

    let transpiler = sdk.transpiler().unwrap();

    // Add our custom transpiler extensions
    sdk.set_transpiler(
        transpiler
            .clone()
            .with_extension(HintsTranspilerExtension {}),
    );

    let elf = sdk.build(
        guest_opts.clone(),
        target_path,
        &Default::default(),
        Default::default(),
    )?;

    // Transpile the ELF into a VmExe.
    let exe = sdk.convert_to_exe(elf)?;

    let elf_binary_path = build_elf_path(guest_opts.clone(), target_path, &Default::default())?;
    let elf = powdr_riscv_elf::load_elf(&elf_binary_path);

    let vm_config = ExtendedVmConfig {
        sdk: sdk.app_config().app_vm_config.clone(),
        hints: HintsExtension,
    };

    Ok(OriginalCompiledProgram {
        exe,
        vm_config: OriginalVmConfig::new(vm_config),
        linked_program: elf,
    })
}

pub fn compile_exe(
    original_program: OriginalCompiledProgram<RiscvISA>,
    config: PowdrConfig,
    pgo_config: PgoConfig,
    empirical_constraints: EmpiricalConstraints,
) -> Result<CompiledProgram<RiscvISA>, Box<dyn std::error::Error>> {
    let compiled = match pgo_config {
        PgoConfig::Cell(pgo_data, max_total_columns) => {
            let max_total_apc_columns: Option<usize> = max_total_columns.map(|max_total_columns| {
                let original_config = original_program.vm_config.clone();

                let total_non_apc_columns: usize = original_config
                    .chip_inventory_air_metrics(config.degree_bound.identities)
                    .values()
                    .map(|m| m.total_width())
                    .sum::<usize>();
                max_total_columns - total_non_apc_columns
            });

            customize(
                original_program,
                config,
                CellPgo::<_, OpenVmApcCandidate<RiscvISA>>::with_pgo_data_and_max_columns(
                    pgo_data,
                    max_total_apc_columns,
                ),
                empirical_constraints,
            )
        }
        PgoConfig::Instruction(pgo_data) => customize(
            original_program,
            config,
            InstructionPgo::with_pgo_data(pgo_data),
            empirical_constraints,
        ),
        PgoConfig::None => customize(
            original_program,
            config,
            NonePgo::default(),
            empirical_constraints,
        ),
    };
    Ok(compiled)
}

use openvm_circuit_derive::VmConfig;

#[derive(Clone, Debug, Serialize, Deserialize, VmConfig)]
// SdkVmConfig plus custom openvm extensions, before autoprecompile transformations.
// For now, only includes custom hints.
pub struct ExtendedVmConfig {
    #[config]
    pub sdk: SdkVmConfig,
    #[extension(executor = "HintsExtensionExecutor<F>")]
    pub hints: HintsExtension,
}

impl TranspilerConfig<BabyBear> for ExtendedVmConfig {
    fn transpiler(&self) -> Transpiler<BabyBear> {
        self.sdk.transpiler()
    }
}

#[derive(Default, Clone)]
pub struct ExtendedVmConfigCpuBuilder;

impl<E> VmBuilder<E> for ExtendedVmConfigCpuBuilder
where
    E: StarkEngine<SC = BabyBearSC, PB = CpuBackend<BabyBearSC>, PD = CpuDevice<BabyBearSC>>,
{
    type VmConfig = ExtendedVmConfig;
    type SystemChipInventory = SystemChipInventory<BabyBearSC>;
    type RecordArena = MatrixRecordArena<Val<BabyBearSC>>;

    fn create_chip_complex(
        &self,
        config: &ExtendedVmConfig,
        circuit: AirInventory<BabyBearSC>,
    ) -> Result<
        VmChipComplex<BabyBearSC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
        ChipInventoryError,
    > {
        let mut chip_complex =
            VmBuilder::<E>::create_chip_complex(&SdkVmCpuBuilder, &config.sdk, circuit)?;
        let inventory = &mut chip_complex.inventory;
        VmProverExtension::<E, _, _>::extend_prover(&HintsProverExt, &config.hints, inventory)?;
        Ok(chip_complex)
    }
}

#[cfg(feature = "cuda")]
#[derive(Default, Clone)]
pub struct ExtendedVmConfigGpuBuilder;

#[cfg(feature = "cuda")]
impl VmBuilder<GpuBabyBearPoseidon2Engine> for ExtendedVmConfigGpuBuilder {
    type VmConfig = ExtendedVmConfig;
    type SystemChipInventory = SystemChipInventoryGPU;
    type RecordArena = DenseRecordArena;

    fn create_chip_complex(
        &self,
        config: &ExtendedVmConfig,
        circuit: AirInventory<BabyBearSC>,
    ) -> Result<
        VmChipComplex<BabyBearSC, Self::RecordArena, GpuBackend, Self::SystemChipInventory>,
        ChipInventoryError,
    > {
        let mut chip_complex = VmBuilder::<GpuBabyBearPoseidon2Engine>::create_chip_complex(
            &openvm_sdk::config::SdkVmGpuBuilder,
            &config.sdk,
            circuit,
        )?;
        let inventory = &mut chip_complex.inventory;
        VmProverExtension::<GpuBabyBearPoseidon2Engine, _, _>::extend_prover(
            &HintsProverExt,
            &config.hints,
            inventory,
        )?;
        Ok(chip_complex)
    }
}

impl InitFileGenerator for ExtendedVmConfig {
    fn generate_init_file_contents(&self) -> Option<String> {
        self.sdk.generate_init_file_contents()
    }

    fn write_to_init_file(
        &self,
        manifest_dir: &Path,
        init_file_name: Option<&str>,
    ) -> std::io::Result<()> {
        self.sdk.write_to_init_file(manifest_dir, init_file_name)
    }
}

pub fn prove(
    program: &CompiledProgram<RiscvISA>,
    mock: bool,
    recursion: bool,
    inputs: StdIn,
    segment_height: Option<usize>, // uses the default height if None
) -> Result<(), Box<dyn std::error::Error>> {
    if mock {
        do_with_trace(program, inputs, |_segment_idx, vm, pk, ctx| {
            debug_proving_ctx(vm, pk, &ctx);
        })?;
    } else {
        let exe = &program.exe;
        let mut vm_config = program.vm_config.clone();

        // DefaultSegmentationStrategy { max_segment_len: 4194204, max_cells_per_chip_in_segment: 503304480 }
        if let Some(segment_height) = segment_height {
            vm_config
                .original
                .config_mut()
                .sdk
                .system
                .config
                .segmentation_limits =
                SegmentationLimits::default().with_max_trace_height(segment_height as u32);
            tracing::debug!("Setting max segment len to {}", segment_height);
        }

        // Set app configuration
        let app_fri_params =
            FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_APP_LOG_BLOWUP);
        let app_config = AppConfig::new(app_fri_params, vm_config.clone());

        // Create the SDK
        #[cfg(feature = "cuda")]
        let sdk = PowdrSdkGpu::new(app_config).unwrap();
        #[cfg(not(feature = "cuda"))]
        let sdk = PowdrSdkCpu::new(app_config).unwrap();
        let mut app_prover = sdk.app_prover(exe.clone())?;

        // Generate a proof
        tracing::info!("Generating app proof...");
        let start = std::time::Instant::now();
        let app_proof = app_prover.prove(inputs.clone())?;
        tracing::info!("App proof took {:?}", start.elapsed());

        tracing::info!("Public values: {:?}", app_proof.user_public_values);

        // Verify
        let app_vk = sdk.app_pk().get_app_vk();
        verify_app_proof(&app_vk, &app_proof)?;
        tracing::info!("App proof verification done.");

        if recursion {
            let mut agg_prover: AggStarkProver<_, _> = sdk.prover(exe.clone())?.agg_prover;

            // Note that this proof is not verified. We assume that any valid app proof
            // (verified above) also leads to a valid aggregation proof.
            // If this was not the case, it would be a completeness bug in OpenVM.
            let start = std::time::Instant::now();
            let _ = agg_prover.generate_root_verifier_input(app_proof)?;
            tracing::info!("Agg proof (inner recursion) took {:?}", start.elapsed());
        }

        tracing::info!("All done.");
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use expect_test::{expect, Expect};
    use itertools::Itertools;
    use powdr_openvm::{
        execution_profile_from_guest,
        extraction_utils::{AirWidths, AirWidthsDiff},
        AirMetrics,
    };
    use pretty_assertions::assert_eq;
    use test_log::test;

    #[allow(clippy::too_many_arguments)]
    fn compile_and_prove(
        guest: &str,
        config: PowdrConfig,
        mock: bool,
        recursion: bool,
        stdin: StdIn,
        pgo_config: PgoConfig,
        segment_height: Option<usize>,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let guest = compile_openvm(guest, GuestOptions::default()).unwrap();
        let program =
            compile_exe(guest, config, pgo_config, EmpiricalConstraints::default()).unwrap();
        prove(&program, mock, recursion, stdin, segment_height)
    }

    fn prove_simple(
        guest: &str,
        config: PowdrConfig,
        stdin: StdIn,
        pgo_config: PgoConfig,
        segment_height: Option<usize>,
    ) {
        compile_and_prove(
            guest,
            config,
            false,
            false,
            stdin,
            pgo_config,
            segment_height,
        )
        .unwrap()
    }

    fn prove_mock(
        guest: &str,
        config: PowdrConfig,
        stdin: StdIn,
        pgo_config: PgoConfig,
        segment_height: Option<usize>,
    ) {
        compile_and_prove(
            guest,
            config,
            true,
            false,
            stdin,
            pgo_config,
            segment_height,
        )
        .unwrap()
    }

    fn prove_recursion(
        guest: &str,
        config: PowdrConfig,
        stdin: StdIn,
        pgo_config: PgoConfig,
        segment_height: Option<usize>,
    ) {
        compile_and_prove(
            guest,
            config,
            false,
            true,
            stdin,
            pgo_config,
            segment_height,
        )
        .unwrap()
    }

    const GUEST: &str = "guest";
    const GUEST_ITER: u32 = 1 << 10;
    const GUEST_APC: u64 = 1;
    const GUEST_SKIP_NO_APC_EXECUTED: u64 = 56;
    const GUEST_SKIP_PGO: u64 = 0;

    const GUEST_KECCAK: &str = "guest-keccak";
    const GUEST_KECCAK_ITER: u32 = 1_000;
    const GUEST_KECCAK_ITER_SMALL: u32 = 10;
    const GUEST_KECCAK_ITER_LARGE: u32 = 25_000;
    const GUEST_KECCAK_APC: u64 = 1;
    const GUEST_KECCAK_APC_PGO: u64 = 10;
    const GUEST_KECCAK_APC_PGO_LARGE: u64 = 100;
    const GUEST_KECCAK_SKIP: u64 = 0;

    const GUEST_SHA256_ITER: u32 = 1_000;
    const GUEST_SHA256_ITER_SMALL: u32 = 10;
    const GUEST_SHA256_ITER_LARGE: u32 = 25_000;
    const GUEST_SHA256: &str = "guest-sha256";
    const GUEST_SHA256_APC_PGO: u64 = 10;
    const GUEST_SHA256_APC_PGO_LARGE: u64 = 50;
    const GUEST_SHA256_SKIP: u64 = 0;

    const GUEST_U256: &str = "guest-u256";
    const GUEST_U256_APC_PGO: u64 = 10;
    const GUEST_U256_SKIP: u64 = 0;

    const GUEST_PAIRING: &str = "guest-pairing";
    const GUEST_PAIRING_APC_PGO: u64 = 10;
    const GUEST_PAIRING_SKIP: u64 = 0;

    const GUEST_HINTS_TEST: &str = "guest-hints-test";

    const GUEST_ECC_HINTS: &str = "guest-ecc-powdr-affine-hint";
    const GUEST_ECC_APC_PGO: u64 = 50;
    const GUEST_ECC_SKIP: u64 = 0;
    // Even with an iteration of 0, the test does one linear combination
    // (and asserts that the result is correct)
    const GUEST_ECC_ITER: u32 = 0;

    const GUEST_ECC_PROJECTIVE: &str = "guest-ecc-projective";
    const GUEST_ECC_PROJECTIVE_APC_PGO: u64 = 50;
    const GUEST_ECC_PROJECTIVE_SKIP: u64 = 0;

    const GUEST_ECRECOVER_HINTS: &str = "guest-ecrecover";
    const GUEST_ECRECOVER_APC_PGO: u64 = 50;
    const GUEST_ECRECOVER_SKIP: u64 = 0;
    const GUEST_ECRECOVER_ITER: u32 = 1;

    #[test]
    fn guest_prove_simple_no_apc_executed() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ITER);

        // Create execution profile but don't prove with it, just to assert that the APC we select isn't executed
        let guest = compile_openvm(GUEST, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let config = default_powdr_openvm_config(GUEST_APC, GUEST_SKIP_NO_APC_EXECUTED);
        let program = compile_exe(
            guest,
            config,
            PgoConfig::None,
            EmpiricalConstraints::default(),
        )
        .unwrap();

        // Assert that all APCs aren't executed
        program
            .vm_config
            .powdr
            .precompiles
            .iter()
            .for_each(|precompile| {
                assert!(!pgo_data
                    .pc_count
                    .keys()
                    .contains(&precompile.apc.block.try_as_basic_block().unwrap().start_pc));
            });

        let result = prove(&program, false, false, stdin, None);
        assert!(result.is_ok());
    }

    #[test]
    fn guest_prove_simple() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ITER);
        let config = default_powdr_openvm_config(GUEST_APC, GUEST_SKIP_PGO);
        let guest = compile_openvm(GUEST, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        prove_simple(GUEST, config, stdin, PgoConfig::Instruction(pgo_data), None);
    }

    #[test]
    fn guest_prove_mock() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ITER);
        let config = default_powdr_openvm_config(GUEST_APC, GUEST_SKIP_PGO);
        let guest = compile_openvm(GUEST, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        prove_mock(GUEST, config, stdin, PgoConfig::Instruction(pgo_data), None);
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn guest_prove_recursion() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ITER);
        let config = default_powdr_openvm_config(GUEST_APC, GUEST_SKIP_PGO);
        let guest = compile_openvm(GUEST, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        prove_recursion(GUEST, config, stdin, PgoConfig::Instruction(pgo_data), None);
    }

    #[test]
    #[ignore = "Too long"]
    fn matmul_compile() {
        let guest = compile_openvm("guest-matmul", GuestOptions::default()).unwrap();
        let config = default_powdr_openvm_config(1, 0);
        assert!(compile_exe(
            guest,
            config,
            PgoConfig::default(),
            EmpiricalConstraints::default()
        )
        .is_ok());
    }

    #[test]
    fn keccak_small_prove_simple() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        prove_simple(GUEST_KECCAK, config, stdin, PgoConfig::None, None);
    }

    #[test]
    fn keccak_small_prove_simple_multi_segment() {
        // Set the default segmentation height to a small value to test multi-segment proving
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        // should create two segments
        prove_simple(GUEST_KECCAK, config, stdin, PgoConfig::None, Some(4_000));
    }

    #[test]
    #[ignore = "Too long"]
    fn keccak_prove_simple() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        prove_simple(GUEST_KECCAK, config, stdin, PgoConfig::None, None);
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn keccak_prove_many_apcs() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER);
        let guest = compile_openvm(GUEST_KECCAK, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let config = default_powdr_openvm_config(GUEST_KECCAK_APC_PGO_LARGE, GUEST_KECCAK_SKIP);
        prove_recursion(
            GUEST_KECCAK,
            config.clone(),
            stdin.clone(),
            PgoConfig::Instruction(pgo_data.clone()),
            None,
        );

        prove_recursion(
            GUEST_KECCAK,
            config.clone(),
            stdin,
            PgoConfig::Cell(pgo_data, None),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn keccak_prove_large() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_LARGE);
        let guest = compile_openvm(GUEST_KECCAK, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let config = default_powdr_openvm_config(GUEST_KECCAK_APC_PGO, GUEST_KECCAK_SKIP);
        prove_recursion(
            GUEST_KECCAK,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    fn keccak_small_prove_mock() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);

        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        prove_mock(GUEST_KECCAK, config, stdin, PgoConfig::None, None);
    }

    #[test]
    #[ignore = "Too long"]
    fn keccak_prove_mock() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        prove_mock(GUEST_KECCAK, config, stdin, PgoConfig::None, None);
    }

    // Create multiple APC for 10 Keccak iterations to test different PGO modes
    #[test]
    fn keccak_prove_multiple_pgo_modes() {
        use std::time::Instant;
        // Config
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC_PGO, GUEST_KECCAK_SKIP);

        // Pgo data
        let guest = compile_openvm(GUEST_KECCAK, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        // Pgo Cell mode
        let start = Instant::now();
        prove_simple(
            GUEST_KECCAK,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!("Proving keccak with PgoConfig::Cell took {:?}", elapsed);

        // Pgo Instruction mode
        let start = Instant::now();
        prove_simple(
            GUEST_KECCAK,
            config.clone(),
            stdin.clone(),
            PgoConfig::Instruction(pgo_data),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!(
            "Proving keccak with PgoConfig::Instruction took {:?}",
            elapsed
        );
    }

    #[test]
    #[ignore = "Too long"]
    fn sha256_prove_simple() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER);
        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);

        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        prove_simple(
            GUEST_SHA256,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    #[ignore = "Too long"]
    fn sha256_prove_mock() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER);
        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);

        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        prove_mock(
            GUEST_SHA256,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn sha256_prove_many_apcs() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER);
        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO_LARGE, GUEST_SHA256_SKIP);
        prove_recursion(
            GUEST_SHA256,
            config.clone(),
            stdin.clone(),
            PgoConfig::Instruction(pgo_data.clone()),
            None,
        );

        prove_recursion(
            GUEST_SHA256,
            config.clone(),
            stdin,
            PgoConfig::Cell(pgo_data, None),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn sha256_prove_large() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER_LARGE);
        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);
        prove_recursion(
            GUEST_SHA256,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    fn sha256_small_prove_simple() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);

        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        prove_simple(
            GUEST_SHA256,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    fn sha256_small_prove_mock() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);

        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        prove_mock(
            GUEST_SHA256,
            config,
            stdin,
            PgoConfig::Instruction(pgo_data),
            None,
        );
    }

    #[test]
    fn sha256_prove_multiple_pgo_modes() {
        use std::time::Instant;

        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER_SMALL);
        let config = default_powdr_openvm_config(GUEST_SHA256_APC_PGO, GUEST_SHA256_SKIP);

        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let start = Instant::now();
        prove_simple(
            GUEST_SHA256,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!("Proving sha256 with PgoConfig::Cell took {:?}", elapsed);

        let start = Instant::now();
        prove_simple(
            GUEST_SHA256,
            config.clone(),
            stdin.clone(),
            PgoConfig::Instruction(pgo_data),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!(
            "Proving sha256 with PgoConfig::Instruction took {:?}",
            elapsed
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn u256_prove_large() {
        use std::time::Instant;

        let stdin = StdIn::default();
        let config = default_powdr_openvm_config(GUEST_U256_APC_PGO, GUEST_U256_SKIP);

        let guest = compile_openvm(GUEST_U256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let start = Instant::now();
        prove_simple(
            GUEST_U256,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!("Proving U256 with PgoConfig::Cell took {:?}", elapsed);
    }

    #[test]
    #[ignore = "Too slow"]
    fn pairing_prove() {
        use std::time::Instant;

        let stdin = StdIn::default();
        let config = default_powdr_openvm_config(GUEST_PAIRING_APC_PGO, GUEST_PAIRING_SKIP);

        let guest = compile_openvm(GUEST_PAIRING, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        let start = Instant::now();
        prove_simple(
            GUEST_PAIRING,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
        let elapsed = start.elapsed();
        tracing::debug!(
            "Proving pairing guest with PgoConfig::Cell took {:?}",
            elapsed
        );
    }

    #[test]
    /// check that the hints test guest compiles and proves successfully
    fn hints_test_prove() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_HINTS_TEST);
        let config = default_powdr_openvm_config(0, 0);

        prove_simple(GUEST_SHA256, config, stdin, PgoConfig::None, None);
    }

    #[test]
    fn ecc_hint_prove() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECC_ITER);
        let guest = compile_openvm(GUEST_ECC_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        let config = default_powdr_openvm_config(GUEST_ECC_APC_PGO, GUEST_ECC_SKIP);
        prove_simple(
            GUEST_ECC_HINTS,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
    }

    #[test]
    fn ecrecover_prove() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECRECOVER_ITER);
        let guest = compile_openvm(GUEST_ECRECOVER_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        let config = default_powdr_openvm_config(GUEST_ECRECOVER_APC_PGO, GUEST_ECRECOVER_SKIP);
        prove_simple(
            GUEST_ECRECOVER_HINTS,
            config.clone(),
            stdin.clone(),
            PgoConfig::Cell(pgo_data.clone(), None),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn ecc_hint_prove_recursion_large() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECC_ITER);
        let guest = compile_openvm(GUEST_ECC_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        let config = default_powdr_openvm_config(GUEST_ECC_APC_PGO, GUEST_ECC_SKIP);
        prove_recursion(
            GUEST_ECC_HINTS,
            config,
            stdin,
            PgoConfig::Cell(pgo_data, None),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn ecrecover_prove_recursion_large() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECRECOVER_ITER);
        let guest = compile_openvm(GUEST_ECRECOVER_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());
        let config = default_powdr_openvm_config(GUEST_ECRECOVER_APC_PGO, GUEST_ECRECOVER_SKIP);
        prove_recursion(
            GUEST_ECRECOVER_HINTS,
            config,
            stdin,
            PgoConfig::Cell(pgo_data, None),
            None,
        );
    }

    #[test]
    fn ecc_projective_prove() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECC_ITER);
        let config =
            default_powdr_openvm_config(GUEST_ECC_PROJECTIVE_APC_PGO, GUEST_ECC_PROJECTIVE_SKIP);

        let guest = compile_openvm(GUEST_ECC_PROJECTIVE, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        prove_simple(
            GUEST_ECC_PROJECTIVE,
            config,
            stdin,
            PgoConfig::Cell(pgo_data, None),
            None,
        );
    }

    #[test]
    #[ignore = "Too much RAM"]
    fn keccak_prove_recursion() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER);
        let config = default_powdr_openvm_config(GUEST_KECCAK_APC, GUEST_KECCAK_SKIP);
        prove_recursion(GUEST_KECCAK, config, stdin, PgoConfig::None, None);
    }

    // The following are compilation tests only

    struct GuestTestConfig {
        pgo_config: PgoConfig,
        name: &'static str,
        apc: u64,
        skip: u64,
    }

    struct MachineTestMetrics {
        powdr_expected_sum: Expect,
        powdr_expected_machine_count: Expect,
        non_powdr_expected_sum: AirMetrics,
        non_powdr_expected_machine_count: usize,
    }

    fn test_machine_compilation(
        guest: GuestTestConfig,
        expected_metrics: MachineTestMetrics,
        expected_columns_saved: Option<Expect>,
    ) {
        let apc_candidates_dir = tempfile::tempdir().unwrap();
        let apc_candidates_dir_path = apc_candidates_dir.path();
        let config = default_powdr_openvm_config(guest.apc, guest.skip)
            .with_apc_candidates_dir(apc_candidates_dir_path);
        let is_cell_pgo = matches!(guest.pgo_config, PgoConfig::Cell(_, _));
        let max_degree = config.degree_bound.identities;
        let guest_program = compile_openvm(guest.name, GuestOptions::default()).unwrap();
        let compiled_program = compile_exe(
            guest_program,
            config,
            guest.pgo_config,
            EmpiricalConstraints::default(),
        )
        .unwrap();

        let (powdr_air_metrics, non_powdr_air_metrics) = compiled_program.air_metrics(max_degree);

        expected_metrics.powdr_expected_sum.assert_debug_eq(
            &powdr_air_metrics
                .iter()
                .map(|(metrics, _)| metrics.clone())
                .sum::<AirMetrics>(),
        );
        expected_metrics
            .powdr_expected_machine_count
            .assert_debug_eq(&powdr_air_metrics.len());
        assert_eq!(
            non_powdr_air_metrics.len(),
            expected_metrics.non_powdr_expected_machine_count
        );
        assert_eq!(
            non_powdr_air_metrics.into_iter().sum::<AirMetrics>(),
            expected_metrics.non_powdr_expected_sum
        );
        let columns_saved = is_cell_pgo.then(|| {
            // Test cells saved in Pgo::Cell
            powdr_air_metrics
                .into_iter()
                .map(|(_, columns_saved)| columns_saved.unwrap())
                .sum::<AirWidthsDiff>()
        });
        assert_eq!(columns_saved.is_some(), expected_columns_saved.is_some());
        if let Some(expected) = expected_columns_saved {
            expected.assert_debug_eq(&columns_saved.unwrap());
        }

        let files = std::fs::read_dir(apc_candidates_dir_path)
            .unwrap()
            .filter_map(Result::ok)
            .map(|entry| {
                entry
                    .path()
                    .file_name()
                    .unwrap()
                    .to_string_lossy()
                    .to_string()
            })
            .collect_vec();
        // Check that the snapshot json files are there.
        assert!(
            files
                .iter()
                .any(|filename| filename.starts_with("apc_candidate_")
                    && filename.ends_with(".json")),
            "APC candidates snapshot JSON file not found"
        );
        if is_cell_pgo {
            // In Cell PGO, check that the apc candidates were persisted to disk
            assert!(
                files.contains(&"apc_candidates.json".to_string()),
                "Candidates file not present."
            );
        } else {
            assert!(
                !files.contains(&"apc_candidates.json".to_string()),
                "Candidates file present, but not expected."
            );
        }
    }

    const NON_POWDR_EXPECTED_MACHINE_COUNT: usize = 19;
    const NON_POWDR_EXPECTED_SUM: AirMetrics = AirMetrics {
        widths: AirWidths {
            preprocessed: 7,
            main: 798,
            log_up: 684,
        },
        constraints: 604,
        bus_interactions: 253,
    };

    #[test]
    fn guest_machine_pgo_modes() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ITER);
        let guest = compile_openvm(GUEST, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin);

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Instruction(pgo_data.clone()),
                name: GUEST,
                apc: GUEST_APC,
                skip: GUEST_SKIP_PGO,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 38,
                            log_up: 56,
                        },
                        constraints: 12,
                        bus_interactions: 26,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    1
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            None,
        );

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, None),
                name: GUEST,
                apc: GUEST_APC,
                skip: GUEST_SKIP_PGO,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 38,
                            log_up: 56,
                        },
                        constraints: 12,
                        bus_interactions: 26,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    1
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 170,
                        log_up: 236,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 38,
                        log_up: 56,
                    },
                }
            "#]]),
        );
    }

    #[test]
    fn sha256_machine_pgo() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_SHA256_ITER_SMALL);
        let guest = compile_openvm(GUEST_SHA256, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin);

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Instruction(pgo_data.clone()),
                name: GUEST_SHA256,
                apc: GUEST_SHA256_APC_PGO,
                skip: GUEST_SHA256_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 14254,
                            log_up: 22752,
                        },
                        constraints: 4279,
                        bus_interactions: 11143,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    10
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            None,
        );

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, None),
                name: GUEST_SHA256,
                apc: GUEST_SHA256_APC_PGO,
                skip: GUEST_SHA256_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 14226,
                            log_up: 22720,
                        },
                        constraints: 4255,
                        bus_interactions: 11133,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    10
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 183410,
                        log_up: 227144,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 14226,
                        log_up: 22720,
                    },
                }
            "#]]),
        );
    }

    #[test]
    fn ecc_hint_machine_pgo_cell() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECC_ITER);
        let guest = compile_openvm(GUEST_ECC_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin);

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, None),
                name: GUEST_ECC_HINTS,
                apc: GUEST_ECC_APC_PGO,
                skip: GUEST_ECC_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 17184,
                            log_up: 27796,
                        },
                        constraints: 8573,
                        bus_interactions: 11892,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    50
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 127688,
                        log_up: 169860,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 17184,
                        log_up: 27796,
                    },
                }
            "#]]),
        );
    }

    #[test]
    fn ecrecover_machine_pgo_cell() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_ECRECOVER_ITER);
        let guest = compile_openvm(GUEST_ECRECOVER_HINTS, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin);

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, None),
                name: GUEST_ECRECOVER_HINTS,
                apc: GUEST_ECRECOVER_APC_PGO,
                skip: GUEST_ECRECOVER_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 19873,
                            log_up: 30884,
                        },
                        constraints: 10968,
                        bus_interactions: 13423,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    50
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 150546,
                        log_up: 198172,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 19873,
                        log_up: 30884,
                    },
                }
            "#]]),
        );
    }

    #[test]
    fn keccak_machine_pgo_modes() {
        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);
        let guest = compile_openvm(GUEST_KECCAK, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin);

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::None,
                name: GUEST_KECCAK,
                apc: GUEST_KECCAK_APC,
                skip: GUEST_KECCAK_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 2022,
                            log_up: 3472,
                        },
                        constraints: 187,
                        bus_interactions: 1734,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    1
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            None,
        );

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Instruction(pgo_data.clone()),
                name: GUEST_KECCAK,
                apc: GUEST_KECCAK_APC,
                skip: GUEST_KECCAK_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 2022,
                            log_up: 3472,
                        },
                        constraints: 187,
                        bus_interactions: 1734,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    1
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            None,
        );

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, None),
                name: GUEST_KECCAK,
                apc: GUEST_KECCAK_APC,
                skip: GUEST_KECCAK_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 2022,
                            log_up: 3472,
                        },
                        constraints: 187,
                        bus_interactions: 1734,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    1
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 27521,
                        log_up: 35156,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 2022,
                        log_up: 3472,
                    },
                }
            "#]]),
        );
    }

    #[test]
    fn keccak_machine_cell_pgo_max_columns() {
        const MAX_TOTAL_COLUMNS: usize = 10_000;

        let mut stdin = StdIn::default();
        stdin.write(&GUEST_KECCAK_ITER_SMALL);

        let guest = compile_openvm(GUEST_KECCAK, GuestOptions::default()).unwrap();
        let pgo_data = execution_profile_from_guest(&guest, stdin.clone());

        test_machine_compilation(
            GuestTestConfig {
                pgo_config: PgoConfig::Cell(pgo_data, Some(MAX_TOTAL_COLUMNS)),
                name: GUEST_KECCAK,
                apc: GUEST_KECCAK_APC_PGO_LARGE,
                skip: GUEST_KECCAK_SKIP,
            },
            MachineTestMetrics {
                powdr_expected_sum: expect![[r#"
                    AirMetrics {
                        widths: AirWidths {
                            preprocessed: 0,
                            main: 3234,
                            log_up: 5264,
                        },
                        constraints: 571,
                        bus_interactions: 2562,
                    }
                "#]],
                powdr_expected_machine_count: expect![[r#"
                    22
                "#]],
                non_powdr_expected_sum: NON_POWDR_EXPECTED_SUM,
                non_powdr_expected_machine_count: NON_POWDR_EXPECTED_MACHINE_COUNT,
            },
            Some(expect![[r#"
                AirWidthsDiff {
                    before: AirWidths {
                        preprocessed: 0,
                        main: 32376,
                        log_up: 41660,
                    },
                    after: AirWidths {
                        preprocessed: 0,
                        main: 3234,
                        log_up: 5264,
                    },
                }
            "#]]),
        );

        // TODO

        // // Assert that total columns don't exceed the initial limit set
        // let total_columns = (powdr_metrics_sum + NON_POWDR_EXPECTED_SUM).widths.total();
        // assert!(
        //     total_columns <= MAX_TOTAL_COLUMNS,
        //     "Total columns exceeded the limit: {total_columns} > {MAX_TOTAL_COLUMNS}"
        // );
    }

    mod extraction {
        use crate::{ExtendedVmConfig, RiscvISA, DEFAULT_OPENVM_DEGREE_BOUND};

        use openvm_algebra_circuit::{Fp2Extension, ModularExtension};
        use openvm_bigint_circuit::Int256;
        use openvm_circuit::arch::SystemConfig;
        use openvm_ecc_circuit::{WeierstrassExtension, SECP256K1_CONFIG};
        use openvm_pairing_circuit::{PairingCurve, PairingExtension};
        use openvm_rv32im_circuit::Rv32M;
        use openvm_sdk::config::SdkVmConfig;
        use powdr_openvm::extraction_utils::OriginalVmConfig;
        use powdr_openvm_riscv_hints_circuit::HintsExtension;

        #[test]
        fn test_get_bus_map() {
            let use_kzg_intrinsics = true;

            let system_config = SystemConfig::default()
                .with_continuations()
                .with_max_constraint_degree(DEFAULT_OPENVM_DEGREE_BOUND)
                .with_public_values(32);
            let int256 = Int256::default();
            let bn_config = PairingCurve::Bn254.curve_config();
            let bls_config = PairingCurve::Bls12_381.curve_config();
            let rv32m = Rv32M {
                range_tuple_checker_sizes: int256.range_tuple_checker_sizes,
            };
            let mut supported_moduli = vec![
                bn_config.modulus.clone(),
                bn_config.scalar.clone(),
                SECP256K1_CONFIG.modulus.clone(),
                SECP256K1_CONFIG.scalar.clone(),
            ];
            let mut supported_complex_moduli =
                vec![("Bn254Fp2".to_string(), bn_config.modulus.clone())];
            let mut supported_curves = vec![bn_config.clone(), SECP256K1_CONFIG.clone()];
            let mut supported_pairing_curves = vec![PairingCurve::Bn254];
            if use_kzg_intrinsics {
                supported_moduli.push(bls_config.modulus.clone());
                supported_moduli.push(bls_config.scalar.clone());
                supported_complex_moduli
                    .push(("Bls12_381Fp2".to_string(), bls_config.modulus.clone()));
                supported_curves.push(bls_config.clone());
                supported_pairing_curves.push(PairingCurve::Bls12_381);
            }
            let sdk_vm_config = SdkVmConfig::builder()
                .system(system_config.into())
                .rv32i(Default::default())
                .rv32m(rv32m)
                .io(Default::default())
                .keccak(Default::default())
                .sha256(Default::default())
                .bigint(int256)
                .modular(ModularExtension::new(supported_moduli))
                .fp2(Fp2Extension::new(supported_complex_moduli))
                .ecc(WeierstrassExtension::new(supported_curves))
                .pairing(PairingExtension::new(supported_pairing_curves))
                .build();

            let _ = OriginalVmConfig::<RiscvISA>::new(ExtendedVmConfig {
                sdk: sdk_vm_config,
                hints: HintsExtension,
            })
            .bus_map();
        }
    }
}


================================================
FILE: openvm-riscv/tests/apc_builder_complex.rs
================================================
mod common;
use openvm_instructions::instruction::Instruction;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::BasicBlock;
use powdr_openvm_riscv::symbolic_instruction_builder::*;
use test_log::test;

fn assert_machine_output(program: Vec<Instruction<BabyBear>>, test_name: &str) {
    let bb = BasicBlock {
        start_pc: 0,
        instructions: program,
    };
    common::apc_builder_utils::assert_machine_output(bb.into(), "complex", test_name);
}

#[test]
fn guest_top_block() {
    // Top block from `guest` with `--pgo cell`, with 4 instructions:
    // Instruction { opcode: 512, args: [8, 8, 16777200, 1, 0, 0, 0] }
    // Instruction { opcode: 531, args: [4, 8, 12, 1, 2, 1, 0] }
    // Instruction { opcode: 576, args: [4, 0, 0, 1, 0, 0, 0] }
    // Instruction { opcode: 565, args: [4, 4, 1780, 1, 0, 1, 0] }

    let program = [
        add(8, 8, 16777200, 0),
        storew(4, 8, 12, 2, 1, 0),
        auipc(4, 0, 0, 1, 0),
        jalr(4, 4, 1780, 1, 0),
    ];

    assert_machine_output(program.to_vec(), "guest_top_block");
}

#[test]
fn memcpy_block() {
    // AND rd_ptr = 52, rs1_ptr = 44, rs2 = 3, rs2_as = 0
    // SLTU rd_ptr = 52, rs1_ptr = 52, rs2 = 1, rs2_as = 0
    // SLTU rd_ptr = 56, rs1_ptr = 56, rs2 = 1, rs2_as = 0
    // OR rd_ptr = 52, rs1_ptr = 52, rs2 = 56, rs2_as = 1
    // BNE 52 0 248 1 1

    let program = [
        and(52, 44, 3, 0),
        sltu(52, 52, 1, 0),
        sltu(56, 56, 1, 0),
        or(52, 52, 56, 1),
        bne(52, 0, 248),
    ];

    assert_machine_output(program.to_vec(), "memcpy_block");
}

#[test]
fn stack_accesses() {
    // The memory optimizer should realize that [x2 + 24] is accessed twice,
    // with the same value of x2. Therefore, we can reduce it to just one access.
    let program = [
        // Load [x2 + 20] into x8
        loadw(8, 2, 20, 2, 1, 0),
        // Load [x2 + 24] into x9
        loadw(9, 2, 24, 2, 1, 0),
        // Store [x8] into [x2 + 24]
        storew(8, 2, 24, 2, 1, 0),
    ];

    assert_machine_output(program.to_vec(), "stack_accesses");
}

// Reth blocks, taken from:
// https://georgwiese.github.io/autoprecompile-analyzer/?data=https%3A%2F%2Fgithub.com%2Fpowdr-labs%2Fbench-results%2Fblob%2Fgh-pages%2Fresults%2F2025-09-25-0815%2Freth%2Fapc_candidates.json

#[test]
fn aligned_memcpy() {
    // Block 0x200a1c of the Reth benchmark.
    // => 1.1B trace cells, executed 986.1K times, effectiveness 3.48x.
    // C code:
    // https://github.com/kraj/musl/blob/d1c1058ee7a61cf86dc0292590e3f7eb09212d70/src/string/memcpy.c#L27-L30
    // RISC-V assembly:
    // https://github.com/openvm-org/openvm/blob/13362dc64fc2ec6f585018b408061bf56e7b7429/crates/toolchain/openvm/src/memcpy.s#L291-L302
    let program = [
        loadw(60, 56, 0, 2, 1, 0),
        loadw(64, 56, 4, 2, 1, 0),
        loadw(68, 56, 8, 2, 1, 0),
        loadw(20, 56, 12, 2, 1, 0),
        storew(60, 52, 0, 2, 1, 0),
        storew(64, 52, 4, 2, 1, 0),
        storew(68, 52, 8, 2, 1, 0),
        storew(20, 52, 12, 2, 1, 0),
        add(56, 56, 16, 0),
        add(48, 48, 16777200, 0),
        add(52, 52, 16, 0),
        bltu(44, 48, -44),
    ];
    assert_machine_output(program.to_vec(), "aligned_memcpy");
}

#[test]
fn unaligned_memcpy() {
    // Block 0x200914 of the Reth benchmark.
    // => 484.1M trace cells, executed 442.9K times, effectiveness 4.61x.
    // C code:
    // https://github.com/kraj/musl/blob/d1c1058ee7a61cf86dc0292590e3f7eb09212d70/src/string/memcpy.c#L23
    // RISC-V assembly:
    // https://github.com/openvm-org/openvm/blob/13362dc64fc2ec6f585018b408061bf56e7b7429/crates/toolchain/openvm/src/memcpy.s#L220-L232
    // Circuit visualization:
    // https://docs.google.com/drawings/d/1JfLRuoWCyAsN3pht27W6UXUgtE_AiNx6r36lf-cAIfs/edit?usp=sharing
    let program = [
        loadb(68, 44, 0, 2, 1, 0),
        add(56, 44, 1, 0),
        add(52, 64, 1, 0),
        storeb(68, 64, 0, 2, 1, 0),
        add(48, 48, 16777215, 0),
        and(44, 60, 3, 0),
        sltu(44, 0, 44, 1),
        sltu(64, 0, 48, 1),
        and(68, 44, 64, 1),
        add(60, 60, 1, 0),
        add(44, 56, 0, 0),
        add(64, 52, 0, 0),
        bne(68, 0, -48),
    ];
    assert_machine_output(program.to_vec(), "unaligned_memcpy");
}

#[test]
fn load_two_bytes_compare() {
    // Block 0x3bc8fc of the Reth benchmark.
    // => 70.3M trace cells, executed 293k times, especially ineffective (1.85x reduction).
    let program = [
        loadb(52, 40, 0, 2, 1, 0),
        loadb(56, 44, 0, 2, 1, 0),
        bne(52, 56, 28),
    ];
    assert_machine_output(program.to_vec(), "load_two_bytes_compare");
}

#[test]
fn load_two_bytes_compare_unsigned() {
    // Similar to `load_two_bytes_compare`, but using `loadbu` instead of `loadb`.
    // Note that the two tests are largely equivalent; the sign extension of `loadb` does not
    // change the comparison result (though the contents of r52 and r56 will differ between the two).
    let program = [
        loadbu(52, 40, 0, 2, 1, 0),
        loadbu(56, 44, 0, 2, 1, 0),
        bne(52, 56, 28),
    ];
    assert_machine_output(program.to_vec(), "load_two_bytes_compare_unsigned");
}

#[test]
fn store_to_same_address() {
    // Store two different values to the same memory address.
    // The memory optimizer should realize the two memory addresses are the same,
    // and eliminate creating two separate memory columns.
    let program = [storeb(4, 8, 8, 2, 1, 0), storeb(32, 8, 8, 2, 1, 0)];
    assert_machine_output(program.to_vec(), "store_to_same_memory_address");
}

#[test]
fn many_stores_relative_to_same_register() {
    // Many stores to different offsets relative to the same base register.
    // For a real-world example of something similar, see:
    // https://georgwiese.github.io/autoprecompile-analyzer/?data=https%3A%2F%2Fgist.githubusercontent.com%2Fgeorgwiese%2Faa85dcc145f26d37f8f03f9a04665971%2Fraw%2F6ce661ec86302d2fef0282908117c0427d9888db%2Freth_with_labels.json&block=0x260648

    // Reproduces issue: Compute memory pointers in the field for intermediate pointers
    // https://github.com/powdr-labs/powdr/issues/3365

    let program = [
        storew(5, 2, 12, 2, 1, 0),
        storew(6, 2, 16, 2, 1, 0),
        storew(7, 2, 20, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "many_stores_relative_to_same_register");
}

#[test]
fn copy_byte() {
    // Copies a byte from one memory location to another, using loadb and storeb.
    // See this real-world example with a similar pattern:
    // https://georgwiese.github.io/autoprecompile-analyzer/?data=https%3A%2F%2Fgist.githubusercontent.com%2Fgeorgwiese%2Faa85dcc145f26d37f8f03f9a04665971%2Fraw%2F6ce661ec86302d2fef0282908117c0427d9888db%2Freth_with_labels.json&block=0x200914

    let program = [
        loadb(8, 2, 0, 2, 1, 0),
        storeb(8, 3, 0, 2, 1, 0),
        // Overwrite r8 with value 3.
        // Something similar happens in the block above: The sign extension of `loadb` is not actually needed.
        add(8, 0, 3, 0),
    ];
    assert_machine_output(program.to_vec(), "copy_byte");
}

#[test]
fn rotate() {
    // Rotation, implemented as `(x >> imm) | (x << (32-imm))`
    // for imm = 1
    let program = [srl(1, 3, 1, 0), sll(2, 3, 31, 0), or(3, 1, 2, 1)];
    assert_machine_output(program.to_vec(), "rotate");
}


================================================
FILE: openvm-riscv/tests/apc_builder_pseudo_instructions.rs
================================================
mod common;
use openvm_instructions::instruction::Instruction;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::BasicBlock;
use powdr_openvm_riscv::symbolic_instruction_builder::*;
use test_log::test;

fn assert_machine_output(program: Vec<Instruction<BabyBear>>, test_name: &str) {
    let bb = BasicBlock {
        start_pc: 0,
        instructions: program,
    };
    common::apc_builder_utils::assert_machine_output(bb.into(), "pseudo_instructions", test_name);
}

// Arithmetic pseudo instructions
#[test]
fn mv() {
    // mv rd, rs1 expands to: addi rd, rs1, 0
    let program = [
        // [x8] = [x5]
        add(8, 5, 0, 0),
    ];
    assert_machine_output(program.to_vec(), "mv");
}

#[test]
fn not() {
    // not rd, rs1 expands to: xori rd, rs1, -1
    // -1 in 24-bit 2's complement is 0xFFFFFF
    let minus_one: u32 = 0xFFFFFF;
    let program = [
        // [x8] = ~[x5]
        xor(8, 5, minus_one, 0),
    ];
    assert_machine_output(program.to_vec(), "not");
}

#[test]
fn neg() {
    // neg rd, rs1 expands to: sub rd, x0, rs1
    let program = [
        // [x8] = -[x5]
        sub(8, 0, 5, 1),
    ];
    assert_machine_output(program.to_vec(), "neg");
}

// Set pseudo instructions
#[test]
fn seqz() {
    // seqz rd, rs1 expands to: sltiu rd, rs1, 1
    // which in our case is: sltu rd, rs1, 1 (with rs2_as = 0 for immediate)
    // This sets rd = 1 if rs1 == 0, else rd = 0
    let program = [
        // [x8] = 1 if [x5] == 0, else 0
        sltu(8, 5, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "seqz");
}

#[test]
fn snez() {
    // snez rd, rs1 expands to: sltu rd, x0, rs1
    let program = [
        // [x8] = 1 if [x5] != 0, else 0
        sltu(8, 0, 5, 1),
    ];
    assert_machine_output(program.to_vec(), "snez");
}

#[test]
fn sltz() {
    // sltz rd, rs1 expands to: slt rd, rs1, x0
    let program = [
        // [x8] = 1 if [x5] < 0 (signed), else 0
        slt(8, 5, 0, 1),
    ];
    assert_machine_output(program.to_vec(), "sltz");
}

#[test]
fn sgtz() {
    // sgtz rd, rs1 expands to: slt rd, x0, rs1
    let program = [
        // [x8] = 1 if [x5] > 0 (signed), else 0
        slt(8, 0, 5, 1),
    ];
    assert_machine_output(program.to_vec(), "sgtz");
}

// Branch pseudo instructions
#[test]
fn beqz() {
    // beqz rs1, offset expands to: beq rs1, x0, offset
    let program = [
        // pc = pc + 8 if [x5] == 0
        beq(5, 0, 8),
    ];
    assert_machine_output(program.to_vec(), "beqz");
}

#[test]
fn bnez() {
    // bnez rs1, offset expands to: bne rs1, x0, offset
    let program = [
        // pc = pc + 8 if [x5] != 0
        bne(5, 0, 8),
    ];
    assert_machine_output(program.to_vec(), "bnez");
}

#[test]
fn blez() {
    // blez rs1, offset expands to: bge x0, rs1, offset
    let program = [
        // pc = pc + 8 if [x5] <= 0 (signed)
        bge(0, 5, 8),
    ];
    assert_machine_output(program.to_vec(), "blez");
}

#[test]
fn bgez() {
    // bgez rs1, offset expands to: bge rs1, x0, offset
    let program = [
        // pc = pc + 8 if [x5] >= 0 (signed)
        bge(5, 0, 8),
    ];
    assert_machine_output(program.to_vec(), "bgez");
}

#[test]
fn bltz() {
    // bltz rs1, offset expands to: blt rs1, x0, offset
    let program = [
        // pc = pc + 8 if [x5] < 0 (signed)
        blt(5, 0, 8),
    ];
    assert_machine_output(program.to_vec(), "bltz");
}

#[test]
fn bgtz() {
    // bgtz rs1, offset expands to: blt x0, rs1, offset
    let program = [
        // pc = pc + 8 if [x5] > 0 (signed)
        blt(0, 5, 8),
    ];
    assert_machine_output(program.to_vec(), "bgtz");
}

// Jump pseudo instructions
#[test]
fn j() {
    // j offset expands to: jal x0, offset
    let program = [
        // pc = pc + 8
        jal(0, 0, 8, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "j");
}

#[test]
fn jr() {
    // jr offset expands to: jal x1, offset
    let program = [
        // pc = pc + 8, [x1] = pc + 4
        jal(1, 0, 8, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "jr");
}

#[test]
fn ret() {
    // ret expands to: jalr x0, x1, 0
    let program = [
        // pc = [x1] + 0
        jalr(0, 1, 0, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "ret");
}

#[test]
fn load_immediate() {
    // [x48] = [x0] + 216 = 216
    let program = [add(48, 0, 216, 0)];
    assert_machine_output(program.to_vec(), "load_immediate");
}


================================================
FILE: openvm-riscv/tests/apc_builder_single_instructions.rs
================================================
mod common;
use openvm_instructions::instruction::Instruction;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::BasicBlock;
use powdr_openvm_riscv::symbolic_instruction_builder::*;
use test_log::test;

fn assert_machine_output(program: Vec<Instruction<BabyBear>>, test_name: &str) {
    let bb = BasicBlock {
        start_pc: 0,
        instructions: program,
    };
    common::apc_builder_utils::assert_machine_output(bb.into(), "single_instructions", test_name);
}

// ALU Chip instructions
#[test]
fn single_add_1() {
    let program = [
        // [x8] = [x8] + 1
        add(8, 8, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_add_1");
}

#[test]
fn single_sub() {
    let program = [
        // [x8] = [x7] - [x5]
        sub(8, 7, 5, 1),
    ];
    assert_machine_output(program.to_vec(), "single_sub");
}

#[test]
fn single_and_0() {
    let program = [
        // [x8] = [x0] & 5
        and(8, 0, 5, 0),
    ];
    assert_machine_output(program.to_vec(), "single_and_0");
}

#[test]
fn single_xor() {
    let program = [
        // [x8] = [x7] ^ [x5]
        xor(8, 7, 5, 1),
    ];
    assert_machine_output(program.to_vec(), "single_xor");
}

#[test]
fn single_mul() {
    let program = [
        // [x8] = [x7] * [x5]
        mul(8, 7, 5, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_mul");
}

// Load/Store Chip instructions
// `needs_write` can be 0 iff `rd=0` for load, but must be 1 if store.
#[test]
fn single_loadw() {
    let program = [
        // Load [x2 + 20]_2 into x8
        loadw(8, 2, 20, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_loadw");
}

#[test]
fn single_loadbu() {
    let program = [
        // Load [x2 + 21]_2 into x8
        loadbu(8, 2, 21, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_loadbu");
}

#[test]
fn single_loadhu() {
    let program = [
        // Load [x2 + 22]_2 but `needs_write=0`
        loadhu(0, 2, 22, 2, 0, 0),
    ];
    assert_machine_output(program.to_vec(), "single_loadhu");
}

#[test]
fn single_storew() {
    let program = [
        // Store [x8] into [x2 - 4]_2
        storew(8, 2, 4, 2, 1, 1),
    ];
    assert_machine_output(program.to_vec(), "single_storew");
}

#[test]
fn single_storeh() {
    let program = [
        // Store [x8] into [x2 - 6]_2
        storeh(8, 2, 6, 2, 1, 1),
    ];
    assert_machine_output(program.to_vec(), "single_storeh");
}

#[test]
fn single_storeb() {
    let program = [
        // Store [x8] into [x2 + 3]_2
        storeb(8, 2, 3, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_storeb");
}

// Load/Store Sign Extend Chip instructions
#[test]
fn single_loadh() {
    let program = [
        // Load [x2 + 6]_2 into x8
        loadh(8, 2, 6, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_loadh");
}

#[test]
fn single_loadb() {
    let program = [
        // Load [x2 + 3]_2 into x8
        loadb(8, 2, 3, 2, 1, 0),
    ];
    assert_machine_output(program.to_vec(), "single_loadb");
}

#[test]
fn single_loadb_imm0() {
    let program = [
        // Load [x2]_2 into x8
        loadb(8, 2, 0, 2, 1, 0),
    ];
    // The x2 + imm addition should be optimized away.
    assert_machine_output(program.to_vec(), "single_loadb_imm0");
}

#[test]
fn single_loadb_x0() {
    let program = [
        // Load [x2 + 3]_2 into x0, i.e. `needs_write=0`
        loadb(0, 2, 3, 2, 0, 0),
    ];
    // The instruction is a no-op, ideally, the APC would be empty.
    assert_machine_output(program.to_vec(), "single_loadb_x0");
}

// Branch Eq Chip instructions
#[test]
fn single_beq() {
    let program = [
        // pc = pc + 2 if x8 == x5
        beq(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_beq");
}

#[test]
fn single_bne() {
    let program = [
        // pc = pc + 2 if x8 != x5
        bne(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_bne");
}

// Branch Lt Chip instructions
#[test]
fn single_blt() {
    let program = [
        // pc = pc + 2 if x8 < x5 (signed)
        blt(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_blt");
}

#[test]
fn single_bltu() {
    let program = [
        // pc = pc + 2 if x8 < x5
        bltu(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_bltu");
}

#[test]
fn single_bge() {
    let program = [
        // pc = pc + 2 if x8 >= x5 (signed)
        bge(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_bge");
}

#[test]
fn single_bgeu() {
    let program = [
        // pc = pc + 2 if x8 >= x5
        bgeu(8, 5, 2),
    ];
    assert_machine_output(program.to_vec(), "single_bgeu");
}

// Shift Chip instructions
#[test]
fn single_srl() {
    // Instruction 416 from the largest basic block of the Keccak guest program.
    let program = [srl(68, 40, 25, 0)];
    assert_machine_output(program.to_vec(), "single_srl");
}

#[test]
fn single_sll() {
    // r68 = r40 << 3
    let program = [sll(68, 40, 3, 0)];
    assert_machine_output(program.to_vec(), "single_sll");
}

#[test]
fn single_sll_by_8() {
    // r68 = r40 << 8
    let program = [sll(68, 40, 8, 0)];
    assert_machine_output(program.to_vec(), "single_sll_by_8");
}

#[test]
fn single_sra() {
    // r68 = sign_extend(r40 >> val(R3))
    let program = [sra(68, 40, 3, 1)];
    assert_machine_output(program.to_vec(), "single_sra");
}

// DivRem Chip instructions
#[test]
fn single_div() {
    // [x8] = [x7] / [x5] (signed)
    let program = [div(8, 7, 5, 1, 0)];
    assert_machine_output(program.to_vec(), "single_div");
}

#[test]
fn single_divu() {
    // [x8] = [x7] / [x5] (unsigned)
    let program = [divu(8, 7, 5, 1, 0)];
    assert_machine_output(program.to_vec(), "single_divu");
}

#[test]
fn single_rem() {
    // [x8] = [x7] % [x5] (signed)
    let program = [rem(8, 7, 5, 1, 0)];
    assert_machine_output(program.to_vec(), "single_rem");
}

#[test]
fn single_remu() {
    // [x8] = [x7] % [x5] (unsigned)
    let program = [remu(8, 7, 5, 1, 0)];
    assert_machine_output(program.to_vec(), "single_remu");
}


================================================
FILE: openvm-riscv/tests/apc_builder_superblocks.rs
================================================
mod common;
use openvm_instructions::instruction::Instruction;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::BasicBlock;
use powdr_openvm_riscv::symbolic_instruction_builder::*;
use test_log::test;

fn assert_machine_output(program: Vec<BasicBlock<Instruction<BabyBear>>>, test_name: &str) {
    common::apc_builder_utils::assert_machine_output(program.into(), "superblocks", test_name);
}

fn bb(
    start_pc: u64,
    instructions: Vec<Instruction<BabyBear>>,
) -> BasicBlock<Instruction<BabyBear>> {
    BasicBlock {
        start_pc,
        instructions,
    }
}

#[test]
fn beq0_fallthrough() {
    // Superblock where the BEQ instruction falls through to the next instruction.
    // This should enforce that x8 != 0.
    let program = [
        bb(0, vec![beq(8, 0, 40)]),
        // PC=4, fallthrough
        bb(4, vec![add(9, 9, 1, 0)]),
    ];

    assert_machine_output(program.to_vec(), "beq0_fallthrough");
}

#[test]
fn beq0_jump() {
    // Superblock where the BEQ instruction jumps to the given address.
    // This should enforce that x8 == 0.
    let program = [
        bb(0, vec![beq(8, 0, 40)]),
        // PC=40, jump taken
        bb(40, vec![add(9, 9, 1, 0)]),
    ];

    assert_machine_output(program.to_vec(), "beq0_jump");
}

#[test]
fn beq_fallthrough() {
    // Superblock where the BEQ instruction falls through to the next instruction.
    // This should enforce that x8 != x10 (x10 holds 33).
    let program = [
        bb(0, vec![add(10, 0, 33, 0), beq(8, 10, 40)]),
        // PC=8, fallthrough (BEQ at PC=4)
        bb(8, vec![add(9, 9, 1, 0)]),
    ];

    assert_machine_output(program.to_vec(), "beq_fallthrough");
}

#[test]
fn beq_jump() {
    // Superblock where the BEQ instruction jumps to the given address.
    // This should enforce that x8 == x10 (x10 holds 33).
    let program = [
        bb(0, vec![add(10, 0, 33, 0), beq(8, 10, 40)]),
        // PC=44, jump taken (BEQ at PC=4 with imm=40 jumps to PC=44)
        bb(44, vec![add(9, 9, 1, 0)]),
    ];

    assert_machine_output(program.to_vec(), "beq_jump");
}

#[test]
fn many_blocks() {
    // Superblock with 3 basic blocks.
    // Constraints should propagate accross the jump instructions:
    // x10 = 10
    // x11 = x10
    // x12 = x11 + 5 = 15
    // x8 = x12 = 15
    let program = [
        bb(
            0,
            vec![
                add(10, 0, 10, 0), // x10 = 10
                bne(10, 11, 100),
            ],
        ),
        // PC=8, BNE fallthrough (x10 = x11)
        bb(
            8,
            vec![
                add(12, 11, 5, 0), // x12 = x11 + 5 = 15 (known after propagation)
                beq(8, 12, 60),    // PC=12, BEQ jump to PC+60=72
            ],
        ),
        // PC=72, BEQ jump (x8 = x12 = 15)
        bb(72, vec![add(9, 9, 1, 0)]),
    ];

    assert_machine_output(program.to_vec(), "many_blocks");
}


================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/aligned_memcpy.txt
================================================
Instructions:
   0: LOADW rd_rs2_ptr = 60, rs1_ptr = 56, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
   4: LOADW rd_rs2_ptr = 64, rs1_ptr = 56, imm = 4, mem_as = 2, needs_write = 1, imm_sign = 0
   8: LOADW rd_rs2_ptr = 68, rs1_ptr = 56, imm = 8, mem_as = 2, needs_write = 1, imm_sign = 0
  12: LOADW rd_rs2_ptr = 20, rs1_ptr = 56, imm = 12, mem_as = 2, needs_write = 1, imm_sign = 0
  16: STOREW rd_rs2_ptr = 60, rs1_ptr = 52, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  20: STOREW rd_rs2_ptr = 64, rs1_ptr = 52, imm = 4, mem_as = 2, needs_write = 1, imm_sign = 0
  24: STOREW rd_rs2_ptr = 68, rs1_ptr = 52, imm = 8, mem_as = 2, needs_write = 1, imm_sign = 0
  28: STOREW rd_rs2_ptr = 20, rs1_ptr = 52, imm = 12, mem_as = 2, needs_write = 1, imm_sign = 0
  32: ADD rd_ptr = 56, rs1_ptr = 56, rs2 = 16, rs2_as = 0
  36: ADD rd_ptr = 48, rs1_ptr = 48, rs2 = 16777200, rs2_as = 0
  40: ADD rd_ptr = 52, rs1_ptr = 52, rs2 = 16, rs2_as = 0
  44: BLTU 44 48 -44 1 1

APC advantage:
  - Main columns: 468 -> 132 (3.55x reduction)
  - Bus interactions: 209 -> 89 (2.35x reduction)
  - Constraints: 291 -> 45 (6.47x reduction)

Symbolic machine using 132 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  read_data_aux__base__prev_timestamp_2
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2
  mem_ptr_limbs__0_2
  mem_ptr_limbs__1_2
  write_base_aux__prev_timestamp_2
  write_base_aux__timestamp_lt_aux__lower_decomp__0_2
  read_data__0_2
  read_data__1_2
  read_data__2_2
  read_data__3_2
  prev_data__0_2
  prev_data__1_2
  prev_data__2_2
  prev_data__3_2
  read_data_aux__base__prev_timestamp_3
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_3
  mem_ptr_limbs__0_3
  mem_ptr_limbs__1_3
  write_base_aux__prev_timestamp_3
  write_base_aux__timestamp_lt_aux__lower_decomp__0_3
  read_data__0_3
  read_data__1_3
  read_data__2_3
  read_data__3_3
  prev_data__0_3
  prev_data__1_3
  prev_data__2_3
  prev_data__3_3
  rs1_data__0_4
  rs1_data__1_4
  rs1_data__2_4
  rs1_data__3_4
  rs1_aux_cols__base__prev_timestamp_4
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_4
  mem_ptr_limbs__0_4
  mem_ptr_limbs__1_4
  write_base_aux__prev_timestamp_4
  write_base_aux__timestamp_lt_aux__lower_decomp__0_4
  prev_data__0_4
  prev_data__1_4
  prev_data__2_4
  prev_data__3_4
  mem_ptr_limbs__0_5
  mem_ptr_limbs__1_5
  write_base_aux__prev_timestamp_5
  write_base_aux__timestamp_lt_aux__lower_decomp__0_5
  prev_data__0_5
  prev_data__1_5
  prev_data__2_5
  prev_data__3_5
  mem_ptr_limbs__0_6
  mem_ptr_limbs__1_6
  write_base_aux__prev_timestamp_6
  write_base_aux__timestamp_lt_aux__lower_decomp__0_6
  prev_data__0_6
  prev_data__1_6
  prev_data__2_6
  prev_data__3_6
  mem_ptr_limbs__0_7
  mem_ptr_limbs__1_7
  write_base_aux__prev_timestamp_7
  write_base_aux__timestamp_lt_aux__lower_decomp__0_7
  prev_data__0_7
  prev_data__1_7
  prev_data__2_7
  prev_data__3_7
  a__0_8
  a__1_8
  a__2_8
  a__3_8
  reads_aux__0__base__prev_timestamp_9
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_9
  writes_aux__prev_data__0_9
  writes_aux__prev_data__1_9
  writes_aux__prev_data__2_9
  writes_aux__prev_data__3_9
  a__0_9
  a__1_9
  a__2_9
  a__3_9
  a__0_10
  a__1_10
  a__2_10
  a__3_10
  reads_aux__0__base__prev_timestamp_11
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_11
  a__0_11
  a__1_11
  a__2_11
  a__3_11
  cmp_result_11
  diff_marker__0_11
  diff_marker__1_11
  diff_marker__2_11
  diff_marker__3_11
  diff_val_11
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[48 - 48 * cmp_result_11, from_state__timestamp_0 + 35]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 56, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 60, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[1, 64, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_2 + 65536 * mem_ptr_limbs__1_2, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, read_data_aux__base__prev_timestamp_2]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_2 + 65536 * mem_ptr_limbs__1_2, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, from_state__timestamp_0 + 7]
mult=is_valid * -1, args=[1, 68, prev_data__0_2, prev_data__1_2, prev_data__2_2, prev_data__3_2, write_base_aux__prev_timestamp_2]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_3 + 65536 * mem_ptr_limbs__1_3, read_data__0_3, read_data__1_3, read_data__2_3, read_data__3_3, read_data_aux__base__prev_timestamp_3]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_3 + 65536 * mem_ptr_limbs__1_3, read_data__0_3, read_data__1_3, read_data__2_3, read_data__3_3, from_state__timestamp_0 + 10]
mult=is_valid * -1, args=[1, 20, prev_data__0_3, prev_data__1_3, prev_data__2_3, prev_data__3_3, write_base_aux__prev_timestamp_3]
mult=is_valid * -1, args=[1, 52, rs1_data__0_4, rs1_data__1_4, rs1_data__2_4, rs1_data__3_4, rs1_aux_cols__base__prev_timestamp_4]
mult=is_valid * 1, args=[1, 60, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 13]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_4 + 65536 * mem_ptr_limbs__1_4, prev_data__0_4, prev_data__1_4, prev_data__2_4, prev_data__3_4, write_base_aux__prev_timestamp_4]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_4 + 65536 * mem_ptr_limbs__1_4, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 14]
mult=is_valid * 1, args=[1, 64, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 16]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_5 + 65536 * mem_ptr_limbs__1_5, prev_data__0_5, prev_data__1_5, prev_data__2_5, prev_data__3_5, write_base_aux__prev_timestamp_5]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_5 + 65536 * mem_ptr_limbs__1_5, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 17]
mult=is_valid * 1, args=[1, 68, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, from_state__timestamp_0 + 19]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_6 + 65536 * mem_ptr_limbs__1_6, prev_data__0_6, prev_data__1_6, prev_data__2_6, prev_data__3_6, write_base_aux__prev_timestamp_6]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_6 + 65536 * mem_ptr_limbs__1_6, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, from_state__timestamp_0 + 20]
mult=is_valid * 1, args=[1, 20, read_data__0_3, read_data__1_3, read_data__2_3, read_data__3_3, from_state__timestamp_0 + 22]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_7 + 65536 * mem_ptr_limbs__1_7, prev_data__0_7, prev_data__1_7, prev_data__2_7, prev_data__3_7, write_base_aux__prev_timestamp_7]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_7 + 65536 * mem_ptr_limbs__1_7, read_data__0_3, read_data__1_3, read_data__2_3, read_data__3_3, from_state__timestamp_0 + 23]
mult=is_valid * 1, args=[1, 56, a__0_8, a__1_8, a__2_8, a__3_8, from_state__timestamp_0 + 26]
mult=is_valid * -1, args=[1, 48, writes_aux__prev_data__0_9, writes_aux__prev_data__1_9, writes_aux__prev_data__2_9, writes_aux__prev_data__3_9, reads_aux__0__base__prev_timestamp_9]
mult=is_valid * 1, args=[1, 52, a__0_10, a__1_10, a__2_10, a__3_10, from_state__timestamp_0 + 32]
mult=is_valid * -1, args=[1, 44, a__0_11, a__1_11, a__2_11, a__3_11, reads_aux__0__base__prev_timestamp_11]
mult=is_valid * 1, args=[1, 44, a__0_11, a__1_11, a__2_11, a__3_11, from_state__timestamp_0 + 33]
mult=is_valid * 1, args=[1, 48, a__0_9, a__1_9, a__2_9, a__3_9, from_state__timestamp_0 + 34]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_2), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_2, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_2 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 92160), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_2 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 107520), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_3), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_3, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_3, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_3 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_3 - (15360 * from_state__timestamp_0 + 138240), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_3, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_3 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_3 - (15360 * from_state__timestamp_0 + 153600), 12]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_4, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_4 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_4 - (15360 * from_state__timestamp_0 + 168960), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_4), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_4, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_4, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_4 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_4 - (15360 * from_state__timestamp_0 + 199680), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_5), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_5, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_5, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_5 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_5 - (15360 * from_state__timestamp_0 + 245760), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_6), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_6, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_6, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_6 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_6 - (15360 * from_state__timestamp_0 + 291840), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_7), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_7, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_7, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_7 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_7 - (15360 * from_state__timestamp_0 + 337920), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_9, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_9 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_9 - (15360 * from_state__timestamp_0 + 399360), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_11, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_11 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_11 - (15360 * from_state__timestamp_0 + 491520), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11, args=[diff_val_11 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a__0_8, a__1_8, 0, 0]
mult=is_valid * 1, args=[a__2_8, a__3_8, 0, 0]
mult=is_valid * 1, args=[a__0_9, a__1_9, 0, 0]
mult=is_valid * 1, args=[a__2_9, a__3_9, 0, 0]
mult=is_valid * 1, args=[a__0_10, a__1_10, 0, 0]
mult=is_valid * 1, args=[a__2_10, a__3_10, 0, 0]

// Algebraic constraints:
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 122880 * is_valid)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 122881)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1 + 251658242 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1 + 251658243)) = 0
(30720 * mem_ptr_limbs__0_2 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 245760 * is_valid)) * (30720 * mem_ptr_limbs__0_2 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 245761)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_2 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_2 + 503316484 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_2 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_2 + 503316485)) = 0
(30720 * mem_ptr_limbs__0_3 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 368640 * is_valid)) * (30720 * mem_ptr_limbs__0_3 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 368641)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_3 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_3 + 754974726 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_3 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_3 + 754974727)) = 0
(30720 * mem_ptr_limbs__0_4 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4)) * (30720 * mem_ptr_limbs__0_4 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 1)) = 0
(943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_4 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_4)) * (943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_4 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_4 + 1)) = 0
(30720 * mem_ptr_limbs__0_5 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 122880 * is_valid)) * (30720 * mem_ptr_limbs__0_5 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 122881)) = 0
(943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_5 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_5 + 251658242 * is_valid)) * (943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_5 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_5 + 251658243)) = 0
(30720 * mem_ptr_limbs__0_6 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 245760 * is_valid)) * (30720 * mem_ptr_limbs__0_6 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 245761)) = 0
(943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_6 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_6 + 503316484 * is_valid)) * (943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_6 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_6 + 503316485)) = 0
(30720 * mem_ptr_limbs__0_7 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 368640 * is_valid)) * (30720 * mem_ptr_limbs__0_7 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 368641)) = 0
(943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_7 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_7 + 754974726 * is_valid)) * (943718400 * rs1_data__0_4 + 30720 * mem_ptr_limbs__1_7 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * mem_ptr_limbs__0_7 + 754974727)) = 0
(7864320 * a__0_8 - (7864320 * rs1_data__0_0 + 125829120 * is_valid)) * (7864320 * a__0_8 - (7864320 * rs1_data__0_0 + 125829121)) = 0
(30720 * a__0_8 + 7864320 * a__1_8 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 491520 * is_valid)) * (30720 * a__0_8 + 7864320 * a__1_8 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 491521)) = 0
(120 * a__0_8 + 30720 * a__1_8 + 7864320 * a__2_8 - (120 * rs1_data__0_0 + 30720 * rs1_data__1_0 + 7864320 * rs1_data__2_0 + 1920 * is_valid)) * (120 * a__0_8 + 30720 * a__1_8 + 7864320 * a__2_8 - (120 * rs1_data__0_0 + 30720 * rs1_data__1_0 + 7864320 * rs1_data__2_0 + 1921)) = 0
(943718400 * rs1_data__0_0 + 120 * a__1_8 + 30720 * a__2_8 + 7864320 * a__3_8 + 1006632953 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * a__0_8)) * (943718400 * rs1_data__0_0 + 120 * a__1_8 + 30720 * a__2_8 + 7864320 * a__3_8 + 1006632952 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * a__0_8)) = 0
(7864320 * a__0_9 + 125829121 * is_valid - 7864320 * writes_aux__prev_data__0_9) * (7864320 * a__0_9 + 125829120 - 7864320 * writes_aux__prev_data__0_9) = 0
(30720 * a__0_9 + 7864320 * a__1_9 + 491521 * is_valid - (30720 * writes_aux__prev_data__0_9 + 7864320 * writes_aux__prev_data__1_9)) * (30720 * a__0_9 + 7864320 * a__1_9 + 491520 - (30720 * writes_aux__prev_data__0_9 + 7864320 * writes_aux__prev_data__1_9)) = 0
(120 * a__0_9 + 30720 * a__1_9 + 7864320 * a__2_9 + 1921 * is_valid - (120 * writes_aux__prev_data__0_9 + 30720 * writes_aux__prev_data__1_9 + 7864320 * writes_aux__prev_data__2_9)) * (120 * a__0_9 + 30720 * a__1_9 + 7864320 * a__2_9 + 1920 - (120 * writes_aux__prev_data__0_9 + 30720 * writes_aux__prev_data__1_9 + 7864320 * writes_aux__prev_data__2_9)) = 0
(943718400 * writes_aux__prev_data__0_9 + 120 * a__1_9 + 30720 * a__2_9 + 7864320 * a__3_9 - (120 * writes_aux__prev_data__1_9 + 30720 * writes_aux__prev_data__2_9 + 7864320 * writes_aux__prev_data__3_9 + 943718400 * a__0_9 + 1006632952 * is_valid)) * (943718400 * writes_aux__prev_data__0_9 + 120 * a__1_9 + 30720 * a__2_9 + 7864320 * a__3_9 - (120 * writes_aux__prev_data__1_9 + 30720 * writes_aux__prev_data__2_9 + 7864320 * writes_aux__prev_data__3_9 + 943718400 * a__0_9 + 1006632953)) = 0
(7864320 * a__0_10 - (7864320 * rs1_data__0_4 + 125829120 * is_valid)) * (7864320 * a__0_10 - (7864320 * rs1_data__0_4 + 125829121)) = 0
(30720 * a__0_10 + 7864320 * a__1_10 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 491520 * is_valid)) * (30720 * a__0_10 + 7864320 * a__1_10 - (30720 * rs1_data__0_4 + 7864320 * rs1_data__1_4 + 491521)) = 0
(120 * a__0_10 + 30720 * a__1_10 + 7864320 * a__2_10 - (120 * rs1_data__0_4 + 30720 * rs1_data__1_4 + 7864320 * rs1_data__2_4 + 1920 * is_valid)) * (120 * a__0_10 + 30720 * a__1_10 + 7864320 * a__2_10 - (120 * rs1_data__0_4 + 30720 * rs1_data__1_4 + 7864320 * rs1_data__2_4 + 1921)) = 0
(943718400 * rs1_data__0_4 + 120 * a__1_10 + 30720 * a__2_10 + 7864320 * a__3_10 + 1006632953 * is_valid - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * a__0_10)) * (943718400 * rs1_data__0_4 + 120 * a__1_10 + 30720 * a__2_10 + 7864320 * a__3_10 + 1006632952 - (120 * rs1_data__1_4 + 30720 * rs1_data__2_4 + 7864320 * rs1_data__3_4 + 943718400 * a__0_10)) = 0
cmp_result_11 * (cmp_result_11 - 1) = 0
diff_marker__3_11 * (diff_marker__3_11 - 1) = 0
(1 - diff_marker__3_11) * ((a__3_9 - a__3_11) * (2 * cmp_result_11 - 1)) = 0
diff_marker__3_11 * ((a__3_11 - a__3_9) * (2 * cmp_result_11 - 1) + diff_val_11) = 0
diff_marker__2_11 * (diff_marker__2_11 - 1) = 0
(1 - (diff_marker__2_11 + diff_marker__3_11)) * ((a__2_9 - a__2_11) * (2 * cmp_result_11 - 1)) = 0
diff_marker__2_11 * ((a__2_11 - a__2_9) * (2 * cmp_result_11 - 1) + diff_val_11) = 0
diff_marker__1_11 * (diff_marker__1_11 - 1) = 0
(1 - (diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11)) * ((a__1_9 - a__1_11) * (2 * cmp_result_11 - 1)) = 0
diff_marker__1_11 * ((a__1_11 - a__1_9) * (2 * cmp_result_11 - 1) + diff_val_11) = 0
diff_marker__0_11 * (diff_marker__0_11 - 1) = 0
(1 - (diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11)) * ((a__0_9 - a__0_11) * (2 * cmp_result_11 - 1)) = 0
diff_marker__0_11 * ((a__0_11 - a__0_9) * (2 * cmp_result_11 - 1) + diff_val_11) = 0
(diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11) * (diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11 - 1) = 0
(1 - (diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11)) * cmp_result_11 = 0
(1 - is_valid) * (diff_marker__0_11 + diff_marker__1_11 + diff_marker__2_11 + diff_marker__3_11) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/copy_byte.txt
================================================
Instructions:
  0: LOADB rd_rs2_ptr = 8, rs1_ptr = 2, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  4: STOREB rd_rs2_ptr = 8, rs1_ptr = 3, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  8: ADD rd_ptr = 8, rs1_ptr = 0, rs2 = 3, rs2_as = 0

APC advantage:
  - Main columns: 113 -> 50 (2.26x reduction)
  - Bus interactions: 55 -> 31 (1.77x reduction)
  - Constraints: 65 -> 21 (3.10x reduction)

Symbolic machine using 50 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  rs1_data__0_1
  rs1_data__1_1
  rs1_data__2_1
  rs1_data__3_1
  rs1_aux_cols__base__prev_timestamp_1
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  flags__0_1
  flags__1_1
  flags__2_1
  flags__3_1
  read_data__0_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  write_data__0_1
  write_data__1_1
  write_data__2_1
  write_data__3_1
  reads_aux__0__base__prev_timestamp_2
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[12, from_state__timestamp_0 + 9]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[1, 3, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, rs1_aux_cols__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 3, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 - (flags__1_1 * flags__2_1 + 2 * flags__0_1 * flags__2_1 + 2 * flags__1_1 * flags__3_1 + 3 * flags__2_1 * flags__3_1), prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 - (flags__1_1 * flags__2_1 + 2 * flags__0_1 * flags__2_1 + 2 * flags__1_1 * flags__3_1 + 3 * flags__2_1 * flags__3_1), write_data__0_1, write_data__1_1, write_data__2_1, write_data__3_1, from_state__timestamp_0 + 5]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_2]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 6]
mult=is_valid * 1, args=[1, 8, 3, 0, 0, 0, from_state__timestamp_0 + 8]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_1 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 30720), 12]
mult=is_valid * 1, args=[503316480 * flags__2_1 * (flags__2_1 - 1) + 503316481 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316480 * flags__1_1 * flags__2_1 + 1006632960 * flags__0_1 * flags__2_1 + 1006632960 * flags__1_1 * flags__3_1 - (503316480 * flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 1006632960 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316481 * flags__2_1 * flags__3_1 + 503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_2 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 76800), 12]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
flags__0_1 * ((flags__0_1 - 1) * (flags__0_1 - 2)) = 0
flags__1_1 * ((flags__1_1 - 1) * (flags__1_1 - 2)) = 0
flags__2_1 * ((flags__2_1 - 1) * (flags__2_1 - 2)) = 0
flags__3_1 * ((flags__3_1 - 1) * (flags__3_1 - 2)) = 0
(flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 1 * is_valid) * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__2_1 * (flags__2_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1)) * read_data__0_1 + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * (255 * data_most_sig_bit_0) + (1006632960 * flags__2_1 * (flags__2_1 - 1) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2)) * (255 * data_most_sig_bit_0) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * (255 * data_most_sig_bit_0) + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1)) * read_data__0_1 + write_data__0_1 - (flags__0_1 * flags__2_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__0_1 = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1)) * (255 * data_most_sig_bit_0) + 1006632960 * flags__2_1 * (flags__2_1 - 1) * (255 * data_most_sig_bit_0) + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - flags__0_1 * flags__1_1) * (255 * data_most_sig_bit_0) + write_data__1_1 - (flags__1_1 * flags__2_1 * read_data__0_1 + (flags__0_1 * flags__2_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__1_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * (255 * data_most_sig_bit_0) + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * (255 * data_most_sig_bit_0) + write_data__2_1 - ((flags__0_1 * flags__2_1 + flags__1_1 * flags__3_1) * read_data__0_1 + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__2_1 * flags__3_1) * prev_data__2_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * (255 * data_most_sig_bit_0) + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * (255 * data_most_sig_bit_0) + write_data__3_1 - (flags__2_1 * flags__3_1 * read_data__0_1 + flags__0_1 * flags__2_1 * (255 * data_most_sig_bit_0) + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1) * prev_data__3_1) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1 + 1)) = 0
(943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1)) * (943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1 + 1)) = 0
flags__1_1 * (flags__1_1 - 1) + flags__2_1 * (flags__2_1 - 1) + 4 * flags__0_1 * flags__1_1 + 4 * flags__0_1 * flags__2_1 + 5 * flags__0_1 * flags__3_1 + 5 * flags__1_1 * flags__2_1 + 5 * flags__1_1 * flags__3_1 + 5 * flags__2_1 * flags__3_1 - (1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 5 * is_valid) = 0
flags__2_1 * (flags__2_1 - 1) - (flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 2 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2)) = 0
opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0 - read_data__0_1 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/guest_top_block.txt
================================================
Instructions:
   0: ADD rd_ptr = 8, rs1_ptr = 8, rs2 = 16777200, rs2_as = 0
   4: STOREW rd_rs2_ptr = 4, rs1_ptr = 8, imm = 12, mem_as = 2, needs_write = 1, imm_sign = 0
   8: AUIPC 4 0 0 1 0
  12: JALR 4 4 1780 1 0

APC advantage:
  - Main columns: 125 -> 26 (4.81x reduction)
  - Bus interactions: 65 -> 18 (3.61x reduction)
  - Constraints: 61 -> 7 (8.71x reduction)

Symbolic machine using 26 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[1788, from_state__timestamp_0 + 9]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[1, 4, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 5]
mult=is_valid * 1, args=[1, 4, 8, 0, 0, 0, from_state__timestamp_0 + 7]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
(7864320 * a__0_0 + 125829121 * is_valid - 7864320 * writes_aux__prev_data__0_0) * (7864320 * a__0_0 + 125829120 - 7864320 * writes_aux__prev_data__0_0) = 0
(30720 * a__0_0 + 7864320 * a__1_0 + 491521 * is_valid - (30720 * writes_aux__prev_data__0_0 + 7864320 * writes_aux__prev_data__1_0)) * (30720 * a__0_0 + 7864320 * a__1_0 + 491520 - (30720 * writes_aux__prev_data__0_0 + 7864320 * writes_aux__prev_data__1_0)) = 0
(120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 1921 * is_valid - (120 * writes_aux__prev_data__0_0 + 30720 * writes_aux__prev_data__1_0 + 7864320 * writes_aux__prev_data__2_0)) * (120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 1920 - (120 * writes_aux__prev_data__0_0 + 30720 * writes_aux__prev_data__1_0 + 7864320 * writes_aux__prev_data__2_0)) = 0
(943718400 * writes_aux__prev_data__0_0 + 120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 - (120 * writes_aux__prev_data__1_0 + 30720 * writes_aux__prev_data__2_0 + 7864320 * writes_aux__prev_data__3_0 + 943718400 * a__0_0 + 1006632952 * is_valid)) * (943718400 * writes_aux__prev_data__0_0 + 120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 - (120 * writes_aux__prev_data__1_0 + 30720 * writes_aux__prev_data__2_0 + 7864320 * writes_aux__prev_data__3_0 + 943718400 * a__0_0 + 1006632953)) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * a__0_0 + 7864320 * a__1_0 + 368640 * is_valid)) * (30720 * mem_ptr_limbs__0_1 - (30720 * a__0_0 + 7864320 * a__1_0 + 368641)) = 0
(943718400 * a__0_0 + 30720 * mem_ptr_limbs__1_1 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718400 * mem_ptr_limbs__0_1 + 754974726 * is_valid)) * (943718400 * a__0_0 + 30720 * mem_ptr_limbs__1_1 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718400 * mem_ptr_limbs__0_1 + 754974727)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/load_two_bytes_compare.txt
================================================
Instructions:
  0: LOADB rd_rs2_ptr = 52, rs1_ptr = 40, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  4: LOADB rd_rs2_ptr = 56, rs1_ptr = 44, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  8: BNE 52 56 28 1 1

APC advantage:
  - Main columns: 98 -> 51 (1.92x reduction)
  - Bus interactions: 47 -> 32 (1.47x reduction)
  - Constraints: 47 -> 15 (3.13x reduction)

Symbolic machine using 51 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  rs1_data__0_1
  rs1_data__1_1
  rs1_data__2_1
  rs1_data__3_1
  rs1_aux_cols__base__prev_timestamp_1
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  opcode_loadb_flag0_1
  shift_most_sig_bit_1
  data_most_sig_bit_1
  shifted_read_data__0_1
  shifted_read_data__1_1
  shifted_read_data__2_1
  shifted_read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  cmp_result_2
  diff_inv_marker__0_2
  free_var_101
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[24 * cmp_result_2 + 12, from_state__timestamp_0 + 8]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 52, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[1, 44, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, rs1_aux_cols__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 44, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 + opcode_loadb_flag0_1 - (2 * shift_most_sig_bit_1 + 1), shift_most_sig_bit_1 * shifted_read_data__2_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__0_1, shift_most_sig_bit_1 * shifted_read_data__3_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__1_1, shift_most_sig_bit_1 * shifted_read_data__0_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__2_1, shift_most_sig_bit_1 * shifted_read_data__1_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 + opcode_loadb_flag0_1 - (2 * shift_most_sig_bit_1 + 1), shift_most_sig_bit_1 * shifted_read_data__2_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__0_1, shift_most_sig_bit_1 * shifted_read_data__3_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__1_1, shift_most_sig_bit_1 * shifted_read_data__0_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__2_1, shift_most_sig_bit_1 * shifted_read_data__1_1 + (1 - shift_most_sig_bit_1) * shifted_read_data__3_1, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[1, 56, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[1, 52, opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, from_state__timestamp_0 + 6]
mult=is_valid * 1, args=[1, 56, opcode_loadb_flag0_1 * shifted_read_data__0_1 + (1 - opcode_loadb_flag0_1) * shifted_read_data__1_1, 255 * data_most_sig_bit_1, 255 * data_most_sig_bit_1, 255 * data_most_sig_bit_1, from_state__timestamp_0 + 7]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[shifted_read_data__0_1 * opcode_loadb_flag0_1 + shifted_read_data__1_1 * (1 - opcode_loadb_flag0_1) - 128 * data_most_sig_bit_1, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_1 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 30720), 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_1 + 503316480 - (503316480 * mem_ptr_limbs__0_1 + 503316480 * opcode_loadb_flag0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
opcode_loadb_flag0_1 * (opcode_loadb_flag0_1 - 1) = 0
data_most_sig_bit_1 * (data_most_sig_bit_1 - 1) = 0
shift_most_sig_bit_1 * (shift_most_sig_bit_1 - 1) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1 + 1)) = 0
(943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1)) * (943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1 + 1)) = 0
cmp_result_2 * (cmp_result_2 - 1) = 0
(1 - cmp_result_2) * ((opcode_loadb_flag0_1 - 1) * shifted_read_data__1_1 + opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0 - opcode_loadb_flag0_1 * shifted_read_data__0_1) = 0
(1 - cmp_result_2) * (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) = 0
((opcode_loadb_flag0_1 - 1) * shifted_read_data__1_1 + opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0 - opcode_loadb_flag0_1 * shifted_read_data__0_1) * diff_inv_marker__0_2 + free_var_101 * ((255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) * (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) + (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) * (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) + (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1) * (255 * data_most_sig_bit_0 - 255 * data_most_sig_bit_1)) - cmp_result_2 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/load_two_bytes_compare_unsigned.txt
================================================
Instructions:
  0: LOADBU rd_rs2_ptr = 52, rs1_ptr = 40, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  4: LOADBU rd_rs2_ptr = 56, rs1_ptr = 44, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  8: BNE 52 56 28 1 1

APC advantage:
  - Main columns: 108 -> 54 (2.00x reduction)
  - Bus interactions: 45 -> 30 (1.50x reduction)
  - Constraints: 61 -> 32 (1.91x reduction)

Symbolic machine using 54 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  flags__0_0
  flags__1_0
  flags__2_0
  flags__3_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  write_data__0_0
  rs1_data__0_1
  rs1_data__1_1
  rs1_data__2_1
  rs1_data__3_1
  rs1_aux_cols__base__prev_timestamp_1
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  flags__0_1
  flags__1_1
  flags__2_1
  flags__3_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  write_data__0_1
  cmp_result_2
  diff_inv_marker__0_2
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[24 * cmp_result_2 + 12, from_state__timestamp_0 + 8]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 52, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[1, 44, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, rs1_aux_cols__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 44, rs1_data__0_1, rs1_data__1_1, rs1_data__2_1, rs1_data__3_1, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[2, flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 2 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 - flags__2_1 * (flags__2_1 - 1), read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[2, flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 2 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1 - flags__2_1 * (flags__2_1 - 1), read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[1, 56, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[1, 52, write_data__0_0, 0, 0, 0, from_state__timestamp_0 + 6]
mult=is_valid * 1, args=[1, 56, write_data__0_1, 0, 0, 0, from_state__timestamp_0 + 7]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316480 * flags__1_0 * flags__2_0 + 1006632960 * flags__0_0 * flags__2_0 + 1006632960 * flags__1_0 * flags__3_0 - (503316480 * flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1006632960 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316481 * flags__2_0 * flags__3_0 + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_1 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 30720), 12]
mult=is_valid * 1, args=[503316480 * flags__2_1 * (flags__2_1 - 1) + 503316481 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316480 * flags__1_1 * flags__2_1 + 1006632960 * flags__0_1 * flags__2_1 + 1006632960 * flags__1_1 * flags__3_1 - (503316480 * flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 1006632960 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316481 * flags__2_1 * flags__3_1 + 503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Algebraic constraints:
flags__0_0 * ((flags__0_0 - 1) * (flags__0_0 - 2)) = 0
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
flags__3_0 * ((flags__3_0 - 1) * (flags__3_0 - 2)) = 0
(flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 1 * is_valid) * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1 * is_valid = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1)) * read_data__0_0 + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__1_0 + (1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) * read_data__2_0 + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0)) * read_data__0_0 + write_data__0_0 - (flags__0_0 * flags__2_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__0_0 = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1)) * read_data__1_0 + 1006632960 * flags__2_0 * (flags__2_0 - 1) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - flags__0_0 * flags__1_0) * read_data__1_0 - (flags__1_0 * flags__2_0 * read_data__0_0 + (flags__0_0 * flags__2_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__1_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__2_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__2_0 - ((flags__0_0 * flags__2_0 + flags__1_0 * flags__3_0) * read_data__0_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__2_0 * flags__3_0) * prev_data__2_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__3_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 - (flags__2_0 * flags__3_0 * read_data__0_0 + flags__0_0 * flags__2_0 * read_data__1_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0) * prev_data__3_0) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 4 * flags__0_0 * flags__1_0 + 4 * flags__0_0 * flags__2_0 + 5 * flags__0_0 * flags__3_0 + 5 * flags__1_0 * flags__2_0 + 5 * flags__1_0 * flags__3_0 + 5 * flags__2_0 * flags__3_0 - (1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1 * is_valid) = 0
flags__0_1 * ((flags__0_1 - 1) * (flags__0_1 - 2)) = 0
flags__1_1 * ((flags__1_1 - 1) * (flags__1_1 - 2)) = 0
flags__2_1 * ((flags__2_1 - 1) * (flags__2_1 - 2)) = 0
flags__3_1 * ((flags__3_1 - 1) * (flags__3_1 - 2)) = 0
(flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 1 * is_valid) * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__2_1 * (flags__2_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 1 * is_valid = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1)) * read_data__0_1 + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__1_1 + (1006632960 * flags__2_1 * (flags__2_1 - 1) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2)) * read_data__2_1 + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__3_1 + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1)) * read_data__0_1 + write_data__0_1 - (flags__0_1 * flags__2_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__0_1 = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1)) * read_data__1_1 + 1006632960 * flags__2_1 * (flags__2_1 - 1) * read_data__3_1 + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - flags__0_1 * flags__1_1) * read_data__1_1 - (flags__1_1 * flags__2_1 * read_data__0_1 + (flags__0_1 * flags__2_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__1_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * read_data__2_1 + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__2_1 - ((flags__0_1 * flags__2_1 + flags__1_1 * flags__3_1) * read_data__0_1 + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__2_1 * flags__3_1) * prev_data__2_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * read_data__3_1 + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__3_1 - (flags__2_1 * flags__3_1 * read_data__0_1 + flags__0_1 * flags__2_1 * read_data__1_1 + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1) * prev_data__3_1) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_1 + 7864320 * rs1_data__1_1 + 1)) = 0
(943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1)) * (943718400 * rs1_data__0_1 + 30720 * mem_ptr_limbs__1_1 - (120 * rs1_data__1_1 + 30720 * rs1_data__2_1 + 7864320 * rs1_data__3_1 + 943718400 * mem_ptr_limbs__0_1 + 1)) = 0
flags__1_1 * (flags__1_1 - 1) + flags__2_1 * (flags__2_1 - 1) + 4 * flags__0_1 * flags__1_1 + 4 * flags__0_1 * flags__2_1 + 5 * flags__0_1 * flags__3_1 + 5 * flags__1_1 * flags__2_1 + 5 * flags__1_1 * flags__3_1 + 5 * flags__2_1 * flags__3_1 - (1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 1 * is_valid) = 0
cmp_result_2 * (cmp_result_2 - 1) = 0
(1 - cmp_result_2) * (write_data__0_0 - write_data__0_1) = 0
(write_data__0_0 - write_data__0_1) * diff_inv_marker__0_2 - cmp_result_2 = 0
flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0 = 0
flags__1_1 * flags__2_1 + 2 * flags__0_1 * flags__2_1 + 2 * flags__1_1 * flags__3_1 + 3 * flags__2_1 * flags__3_1 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/many_stores_relative_to_same_register.txt
================================================
Instructions:
  0: STOREW rd_rs2_ptr = 5, rs1_ptr = 2, imm = 12, mem_as = 2, needs_write = 1, imm_sign = 0
  4: STOREW rd_rs2_ptr = 6, rs1_ptr = 2, imm = 16, mem_as = 2, needs_write = 1, imm_sign = 0
  8: STOREW rd_rs2_ptr = 7, rs1_ptr = 2, imm = 20, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 123 -> 50 (2.46x reduction)
  - Bus interactions: 51 -> 36 (1.42x reduction)
  - Constraints: 75 -> 7 (10.71x reduction)

Symbolic machine using 50 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  read_data_aux__base__prev_timestamp_2
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2
  mem_ptr_limbs__0_2
  mem_ptr_limbs__1_2
  write_base_aux__prev_timestamp_2
  write_base_aux__timestamp_lt_aux__lower_decomp__0_2
  read_data__0_2
  read_data__1_2
  read_data__2_2
  read_data__3_2
  prev_data__0_2
  prev_data__1_2
  prev_data__2_2
  prev_data__3_2
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[12, from_state__timestamp_0 + 9]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 5, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 2]
mult=is_valid * -1, args=[1, 6, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 6, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 5]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0 + 6]
mult=is_valid * -1, args=[1, 7, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, read_data_aux__base__prev_timestamp_2]
mult=is_valid * 1, args=[1, 7, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, from_state__timestamp_0 + 7]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_2 + 65536 * mem_ptr_limbs__1_2, prev_data__0_2, prev_data__1_2, prev_data__2_2, prev_data__3_2, write_base_aux__prev_timestamp_2]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_2 + 65536 * mem_ptr_limbs__1_2, read_data__0_2, read_data__1_2, read_data__2_2, read_data__3_2, from_state__timestamp_0 + 8]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_2), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_2, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_2 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 92160), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_2 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 107520), 12]

// Algebraic constraints:
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 368640 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 368641)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 754974726 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 754974727)) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 491520 * is_valid)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 491521)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 + 1006632953 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 + 1006632952 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1)) = 0
(30720 * mem_ptr_limbs__0_2 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614400 * is_valid)) * (30720 * mem_ptr_limbs__0_2 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614401)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_2 + 754974711 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_2)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_2 + 754974710 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_2)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/memcpy_block.txt
================================================
Instructions:
   0: AND rd_ptr = 52, rs1_ptr = 44, rs2 = 3, rs2_as = 0
   4: SLTU rd_ptr = 52, rs1_ptr = 52, rs2 = 1, rs2_as = 0
   8: SLTU rd_ptr = 56, rs1_ptr = 56, rs2 = 1, rs2_as = 0
  12: OR rd_ptr = 52, rs1_ptr = 52, rs2 = 56, rs2_as = 1
  16: BNE 52 0 248 1 1

APC advantage:
  - Main columns: 172 -> 29 (5.93x reduction)
  - Bus interactions: 87 -> 19 (4.58x reduction)
  - Constraints: 111 -> 10 (11.10x reduction)

Symbolic machine using 29 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_1
  reads_aux__0__base__prev_timestamp_2
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__prev_data__0_2
  writes_aux__prev_data__1_2
  writes_aux__prev_data__2_2
  writes_aux__prev_data__3_2
  cmp_result_2
  reads_aux__1__base__prev_timestamp_4
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_4
  cmp_result_4
  inv_of_sum_173
  inv_of_sum_174
  free_var_176
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[244 * cmp_result_4 + 20, from_state__timestamp_0 + 14]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 44, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 44, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 52, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 56, writes_aux__prev_data__0_2, writes_aux__prev_data__1_2, writes_aux__prev_data__2_2, writes_aux__prev_data__3_2, reads_aux__0__base__prev_timestamp_2]
mult=is_valid * 1, args=[1, 56, cmp_result_2, 0, 0, 0, from_state__timestamp_0 + 10]
mult=is_valid * 1, args=[1, 52, cmp_result_1 + cmp_result_2 - cmp_result_1 * cmp_result_2, 0, 0, 0, from_state__timestamp_0 + 12]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_4]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 13]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_2 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 76800), 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_4, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_4 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_4 - (15360 * from_state__timestamp_0 + 184320), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[b__0_0, 3, b__0_0 + 3 - 2 * a__0_0, 1]

// Algebraic constraints:
cmp_result_1 * (cmp_result_1 - 1) = 0
cmp_result_2 * (cmp_result_2 - 1) = 0
cmp_result_4 * (cmp_result_4 - 1) = 0
cmp_result_1 * a__0_0 = 0
inv_of_sum_173 * a__0_0 + cmp_result_1 - 1 * is_valid = 0
cmp_result_2 * (writes_aux__prev_data__0_2 + writes_aux__prev_data__1_2 + writes_aux__prev_data__2_2 + writes_aux__prev_data__3_2) = 0
inv_of_sum_174 * (writes_aux__prev_data__0_2 + writes_aux__prev_data__1_2 + writes_aux__prev_data__2_2 + writes_aux__prev_data__3_2) + cmp_result_2 - 1 * is_valid = 0
(1 - cmp_result_4) * (cmp_result_1 + cmp_result_2 - cmp_result_1 * cmp_result_2) = 0
free_var_176 * (cmp_result_1 + cmp_result_2 - cmp_result_1 * cmp_result_2) - cmp_result_4 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/rotate.txt
================================================
Instructions:
  0: SRL rd_ptr = 1, rs1_ptr = 3, rs2 = 1, rs2_as = 0
  4: SLL rd_ptr = 2, rs1_ptr = 3, rs2 = 31, rs2_as = 0
  8: OR rd_ptr = 3, rs1_ptr = 1, rs2 = 2, rs2_as = 1

APC advantage:
  - Main columns: 142 -> 26 (5.46x reduction)
  - Bus interactions: 68 -> 18 (3.78x reduction)
  - Constraints: 174 -> 5 (34.80x reduction)

Symbolic machine using 26 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  writes_aux__base__prev_timestamp_1
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_1
  writes_aux__prev_data__0_1
  writes_aux__prev_data__1_1
  writes_aux__prev_data__2_1
  writes_aux__prev_data__3_1
  a__3_1
  a__3_2
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[12, from_state__timestamp_0 + 9]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 3, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 1, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 2, writes_aux__prev_data__0_1, writes_aux__prev_data__1_1, writes_aux__prev_data__2_1, writes_aux__prev_data__3_1, writes_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 1, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 6]
mult=is_valid * 1, args=[1, 2, 0, 0, 0, a__3_1, from_state__timestamp_0 + 7]
mult=is_valid * 1, args=[1, 3, a__0_0, a__1_0, a__2_0, a__3_2, from_state__timestamp_0 + 8]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[7864320 * a__3_1 - 1006632960 * b__0_0, 7]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_1 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__3_0, a__3_1, 2 * a__3_2 - (a__3_0 + a__3_1), 1]
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, 0, 0, 0]

// Algebraic constraints:
(b__0_0 + 256 * b__1_0 + 65536 * b__2_0 + 16777216 * b__3_0 - (2 * a__0_0 + 512 * a__1_0 + 131072 * a__2_0 + 33554432 * a__3_0)) * (b__0_0 + 256 * b__1_0 + 65536 * b__2_0 + 16777216 * b__3_0 - (2 * a__0_0 + 512 * a__1_0 + 131072 * a__2_0 + 33554432 * a__3_0 + 1)) = 0
(b__1_0 + 256 * b__2_0 + 65536 * b__3_0 - (2 * a__1_0 + 512 * a__2_0 + 131072 * a__3_0)) * (b__1_0 + 256 * b__2_0 + 65536 * b__3_0 - (2 * a__1_0 + 512 * a__2_0 + 131072 * a__3_0 + 1)) = 0
(b__2_0 + 256 * b__3_0 - (2 * a__2_0 + 512 * a__3_0)) * (b__2_0 + 256 * b__3_0 - (2 * a__2_0 + 512 * a__3_0 + 1)) = 0
(b__3_0 - 2 * a__3_0) * (b__3_0 - (2 * a__3_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/stack_accesses.txt
================================================
Instructions:
  0: LOADW rd_rs2_ptr = 8, rs1_ptr = 2, imm = 20, mem_as = 2, needs_write = 1, imm_sign = 0
  4: LOADW rd_rs2_ptr = 9, rs1_ptr = 2, imm = 24, mem_as = 2, needs_write = 1, imm_sign = 0
  8: STOREW rd_rs2_ptr = 8, rs1_ptr = 2, imm = 24, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 123 -> 36 (3.42x reduction)
  - Bus interactions: 51 -> 26 (1.96x reduction)
  - Constraints: 75 -> 5 (15.00x reduction)

Symbolic machine using 36 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  mem_ptr_limbs__0_1
  mem_ptr_limbs__1_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[12, from_state__timestamp_0 + 9]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * -1, args=[1, 9, prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[1, 9, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 5]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0 + 6]
mult=is_valid * 1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 7]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_1 + 65536 * mem_ptr_limbs__1_1, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 8]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_1), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_1, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Algebraic constraints:
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614400 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614401)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 754974711 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 754974710 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
(30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 737280 * is_valid)) * (30720 * mem_ptr_limbs__0_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 737281)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 + 503316469 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_1 + 503316468 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/store_to_same_memory_address.txt
================================================
Instructions:
  0: STOREB rd_rs2_ptr = 4, rs1_ptr = 8, imm = 8, mem_as = 2, needs_write = 1, imm_sign = 0
  4: STOREB rd_rs2_ptr = 32, rs1_ptr = 8, imm = 8, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 82 -> 50 (1.64x reduction)
  - Bus interactions: 34 -> 25 (1.36x reduction)
  - Constraints: 50 -> 27 (1.85x reduction)

Symbolic machine using 50 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  flags__0_0
  flags__1_0
  flags__2_0
  flags__3_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  write_data__0_0
  write_data__1_0
  write_data__2_0
  write_data__3_0
  read_data_aux__base__prev_timestamp_1
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1
  write_base_aux__prev_timestamp_1
  write_base_aux__timestamp_lt_aux__lower_decomp__0_1
  flags__0_1
  flags__1_1
  flags__2_1
  flags__3_1
  read_data__0_1
  read_data__1_1
  read_data__2_1
  read_data__3_1
  prev_data__0_1
  prev_data__1_1
  prev_data__2_1
  prev_data__3_1
  write_data__0_1
  write_data__1_1
  write_data__2_1
  write_data__3_1
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[8, from_state__timestamp_0 + 6]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 4, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 4, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0), prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0), write_data__0_0, write_data__1_0, write_data__2_0, write_data__3_0, from_state__timestamp_0 + 2]
mult=is_valid * 1, args=[1, 8, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[1, 32, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, read_data_aux__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 32, read_data__0_1, read_data__1_1, read_data__2_1, read_data__3_1, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_1 * flags__2_1 + 2 * flags__0_1 * flags__2_1 + 2 * flags__1_1 * flags__3_1 + 3 * flags__2_1 * flags__3_1), prev_data__0_1, prev_data__1_1, prev_data__2_1, prev_data__3_1, write_base_aux__prev_timestamp_1]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_1 * flags__2_1 + 2 * flags__0_1 * flags__2_1 + 2 * flags__1_1 * flags__3_1 + 3 * flags__2_1 * flags__3_1), write_data__0_1, write_data__1_1, write_data__2_1, write_data__3_1, from_state__timestamp_0 + 5]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316480 * flags__1_0 * flags__2_0 + 1006632960 * flags__0_0 * flags__2_0 + 1006632960 * flags__1_0 * flags__3_0 - (503316480 * flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1006632960 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316481 * flags__2_0 * flags__3_0 + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[503316480 * flags__2_1 * (flags__2_1 - 1) + 503316481 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316480 * flags__1_1 * flags__2_1 + 1006632960 * flags__0_1 * flags__2_1 + 1006632960 * flags__1_1 * flags__3_1 - (503316480 * flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 1006632960 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 503316481 * flags__2_1 * flags__3_1 + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_1 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_1 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]

// Algebraic constraints:
flags__0_0 * ((flags__0_0 - 1) * (flags__0_0 - 2)) = 0
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
flags__3_0 * ((flags__3_0 - 1) * (flags__3_0 - 2)) = 0
(flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 1 * is_valid) * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1)) * read_data__0_0 + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__1_0 + (1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) * read_data__2_0 + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0)) * read_data__0_0 + write_data__0_0 - (flags__0_0 * flags__2_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__0_0 = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1)) * read_data__1_0 + 1006632960 * flags__2_0 * (flags__2_0 - 1) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - flags__0_0 * flags__1_0) * read_data__1_0 + write_data__1_0 - (flags__1_0 * flags__2_0 * read_data__0_0 + (flags__0_0 * flags__2_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__1_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__2_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__2_0 + write_data__2_0 - ((flags__0_0 * flags__2_0 + flags__1_0 * flags__3_0) * read_data__0_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__2_0 * flags__3_0) * prev_data__2_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__3_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + write_data__3_0 - (flags__2_0 * flags__3_0 * read_data__0_0 + flags__0_0 * flags__2_0 * read_data__1_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0) * prev_data__3_0) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 245760 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 245761)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 503316484 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 503316485)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 4 * flags__0_0 * flags__1_0 + 4 * flags__0_0 * flags__2_0 + 5 * flags__0_0 * flags__3_0 + 5 * flags__1_0 * flags__2_0 + 5 * flags__1_0 * flags__3_0 + 5 * flags__2_0 * flags__3_0 - (1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 5 * is_valid) = 0
flags__0_1 * ((flags__0_1 - 1) * (flags__0_1 - 2)) = 0
flags__1_1 * ((flags__1_1 - 1) * (flags__1_1 - 2)) = 0
flags__2_1 * ((flags__2_1 - 1) * (flags__2_1 - 2)) = 0
flags__3_1 * ((flags__3_1 - 1) * (flags__3_1 - 2)) = 0
(flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 1 * is_valid) * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__2_1 * (flags__2_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1) + 1006632960 * flags__3_1 * (flags__3_1 - 1)) * read_data__0_1 + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__1_1 + (1006632960 * flags__2_1 * (flags__2_1 - 1) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2)) * read_data__2_1 + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__3_1 + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1)) * read_data__0_1 + write_data__0_1 - (flags__0_1 * flags__2_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__0_1 = 0
(1006632960 * flags__0_1 * (flags__0_1 - 1) + 1006632960 * flags__1_1 * (flags__1_1 - 1)) * read_data__1_1 + 1006632960 * flags__2_1 * (flags__2_1 - 1) * read_data__3_1 + (flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) - flags__0_1 * flags__1_1) * read_data__1_1 + write_data__1_1 - (flags__1_1 * flags__2_1 * read_data__0_1 + (flags__0_1 * flags__2_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__3_1 + flags__2_1 * flags__3_1) * prev_data__1_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * read_data__2_1 + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__2_1 + write_data__2_1 - ((flags__0_1 * flags__2_1 + flags__1_1 * flags__3_1) * read_data__0_1 + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__2_1 * flags__3_1) * prev_data__2_1) = 0
1006632960 * flags__0_1 * (flags__0_1 - 1) * read_data__3_1 + flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) * read_data__3_1 + write_data__3_1 - (flags__2_1 * flags__3_1 * read_data__0_1 + flags__0_1 * flags__2_1 * read_data__1_1 + (flags__0_1 * flags__1_1 + flags__0_1 * flags__3_1 + flags__1_1 * flags__2_1 + flags__1_1 * flags__3_1) * prev_data__3_1) = 0
flags__1_1 * (flags__1_1 - 1) + flags__2_1 * (flags__2_1 - 1) + 4 * flags__0_1 * flags__1_1 + 4 * flags__0_1 * flags__2_1 + 5 * flags__0_1 * flags__3_1 + 5 * flags__1_1 * flags__2_1 + 5 * flags__1_1 * flags__3_1 + 5 * flags__2_1 * flags__3_1 - (1006632960 * flags__3_1 * (flags__3_1 - 1) + flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__3_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 5 * is_valid) = 0
flags__2_0 * (flags__2_0 - 1) - (flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) = 0
flags__2_1 * (flags__2_1 - 1) - (flags__0_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 2 * flags__1_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2) + 3 * flags__2_1 * (flags__0_1 + flags__1_1 + flags__2_1 + flags__3_1 - 2)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/complex/unaligned_memcpy.txt
================================================
Instructions:
   0: LOADB rd_rs2_ptr = 68, rs1_ptr = 44, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
   4: ADD rd_ptr = 56, rs1_ptr = 44, rs2 = 1, rs2_as = 0
   8: ADD rd_ptr = 52, rs1_ptr = 64, rs2 = 1, rs2_as = 0
  12: STOREB rd_rs2_ptr = 68, rs1_ptr = 64, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0
  16: ADD rd_ptr = 48, rs1_ptr = 48, rs2 = 16777215, rs2_as = 0
  20: AND rd_ptr = 44, rs1_ptr = 60, rs2 = 3, rs2_as = 0
  24: SLTU rd_ptr = 44, rs1_ptr = 0, rs2 = 44, rs2_as = 1
  28: SLTU rd_ptr = 64, rs1_ptr = 0, rs2 = 48, rs2_as = 1
  32: AND rd_ptr = 68, rs1_ptr = 44, rs2 = 64, rs2_as = 1
  36: ADD rd_ptr = 60, rs1_ptr = 60, rs2 = 1, rs2_as = 0
  40: ADD rd_ptr = 44, rs1_ptr = 56, rs2 = 0, rs2_as = 0
  44: ADD rd_ptr = 64, rs1_ptr = 52, rs2 = 0, rs2_as = 0
  48: BNE 68 0 -48 1 1

APC advantage:
  - Main columns: 465 -> 105 (4.43x reduction)
  - Bus interactions: 242 -> 58 (4.17x reduction)
  - Constraints: 286 -> 67 (4.27x reduction)

Symbolic machine using 105 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  writes_aux__base__prev_timestamp_1
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_1
  writes_aux__prev_data__0_1
  writes_aux__prev_data__1_1
  writes_aux__prev_data__2_1
  writes_aux__prev_data__3_1
  a__0_1
  a__1_1
  a__2_1
  a__3_1
  reads_aux__0__base__prev_timestamp_2
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__base__prev_timestamp_2
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__prev_data__0_2
  writes_aux__prev_data__1_2
  writes_aux__prev_data__2_2
  writes_aux__prev_data__3_2
  a__0_2
  a__1_2
  a__2_2
  a__3_2
  b__0_2
  b__1_2
  b__2_2
  b__3_2
  mem_ptr_limbs__0_3
  mem_ptr_limbs__1_3
  write_base_aux__prev_timestamp_3
  write_base_aux__timestamp_lt_aux__lower_decomp__0_3
  flags__0_3
  flags__1_3
  flags__2_3
  flags__3_3
  read_data__0_3
  prev_data__0_3
  prev_data__1_3
  prev_data__2_3
  prev_data__3_3
  write_data__0_3
  write_data__1_3
  write_data__2_3
  write_data__3_3
  reads_aux__0__base__prev_timestamp_4
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4
  writes_aux__prev_data__0_4
  writes_aux__prev_data__1_4
  writes_aux__prev_data__2_4
  writes_aux__prev_data__3_4
  a__0_4
  a__1_4
  a__2_4
  a__3_4
  reads_aux__0__base__prev_timestamp_5
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_5
  a__0_5
  b__0_5
  b__1_5
  b__2_5
  b__3_5
  reads_aux__0__base__prev_timestamp_6
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_6
  cmp_result_6
  diff_marker__0_6
  diff_marker__1_6
  diff_marker__2_6
  diff_marker__3_6
  diff_val_6
  cmp_result_7
  diff_marker__0_7
  diff_marker__1_7
  diff_marker__2_7
  diff_marker__3_7
  diff_val_7
  a__0_9
  a__1_9
  a__2_9
  a__3_9
  cmp_result_12
  free_var_467
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[52 - 52 * cmp_result_12, from_state__timestamp_0 + 38]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 44, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 68, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * -1, args=[1, 56, writes_aux__prev_data__0_1, writes_aux__prev_data__1_1, writes_aux__prev_data__2_1, writes_aux__prev_data__3_1, writes_aux__base__prev_timestamp_1]
mult=is_valid * -1, args=[1, 64, b__0_2, b__1_2, b__2_2, b__3_2, reads_aux__0__base__prev_timestamp_2]
mult=is_valid * -1, args=[1, 52, writes_aux__prev_data__0_2, writes_aux__prev_data__1_2, writes_aux__prev_data__2_2, writes_aux__prev_data__3_2, writes_aux__base__prev_timestamp_2]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_3 + 65536 * mem_ptr_limbs__1_3 - (flags__1_3 * flags__2_3 + 2 * flags__0_3 * flags__2_3 + 2 * flags__1_3 * flags__3_3 + 3 * flags__2_3 * flags__3_3), prev_data__0_3, prev_data__1_3, prev_data__2_3, prev_data__3_3, write_base_aux__prev_timestamp_3]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_3 + 65536 * mem_ptr_limbs__1_3 - (flags__1_3 * flags__2_3 + 2 * flags__0_3 * flags__2_3 + 2 * flags__1_3 * flags__3_3 + 3 * flags__2_3 * flags__3_3), write_data__0_3, write_data__1_3, write_data__2_3, write_data__3_3, from_state__timestamp_0 + 11]
mult=is_valid * -1, args=[1, 48, writes_aux__prev_data__0_4, writes_aux__prev_data__1_4, writes_aux__prev_data__2_4, writes_aux__prev_data__3_4, reads_aux__0__base__prev_timestamp_4]
mult=is_valid * -1, args=[1, 60, b__0_5, b__1_5, b__2_5, b__3_5, reads_aux__0__base__prev_timestamp_5]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_6]
mult=is_valid * 1, args=[1, 48, a__0_4, a__1_4, a__2_4, a__3_4, from_state__timestamp_0 + 22]
mult=is_valid * 1, args=[1, 60, a__0_9, a__1_9, a__2_9, a__3_9, from_state__timestamp_0 + 29]
mult=is_valid * 1, args=[1, 56, a__0_1, a__1_1, a__2_1, a__3_1, from_state__timestamp_0 + 30]
mult=is_valid * 1, args=[1, 44, a__0_1, a__1_1, a__2_1, a__3_1, from_state__timestamp_0 + 32]
mult=is_valid * 1, args=[1, 52, a__0_2, a__1_2, a__2_2, a__3_2, from_state__timestamp_0 + 33]
mult=is_valid * 1, args=[1, 64, a__0_2, a__1_2, a__2_2, a__3_2, from_state__timestamp_0 + 35]
mult=is_valid * 1, args=[1, 68, cmp_result_6 * cmp_result_7, 0, 0, 0, from_state__timestamp_0 + 36]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 37]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_1 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 61440), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_2 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 76800), 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_2 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 107520), 12]
mult=is_valid * 1, args=[503316480 * flags__2_3 * (flags__2_3 - 1) + 503316481 * flags__2_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 503316480 * flags__1_3 * flags__2_3 + 1006632960 * flags__0_3 * flags__2_3 + 1006632960 * flags__1_3 * flags__3_3 - (503316480 * flags__0_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 1006632960 * flags__1_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 503316481 * flags__2_3 * flags__3_3 + 503316480 * mem_ptr_limbs__0_3), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_3, 13]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_3, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_3 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_3 - (15360 * from_state__timestamp_0 + 153600), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_4 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4 - (15360 * from_state__timestamp_0 + 168960), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_5, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_5 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_5 - (15360 * from_state__timestamp_0 + 215040), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_6, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_6 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_6 - (15360 * from_state__timestamp_0 + 261120), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[b__0_5, 3, b__0_5 + 3 - 2 * a__0_5, 1]
mult=diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6, args=[diff_val_6 - 1, 0, 0, 0]
mult=diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7, args=[diff_val_7 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a__0_1, a__1_1, 0, 0]
mult=is_valid * 1, args=[a__2_1, a__3_1, 0, 0]
mult=is_valid * 1, args=[a__0_2, a__1_2, 0, 0]
mult=is_valid * 1, args=[a__2_2, a__3_2, 0, 0]
mult=is_valid * 1, args=[a__0_4, a__1_4, 0, 0]
mult=is_valid * 1, args=[a__2_4, a__3_4, 0, 0]
mult=is_valid * 1, args=[a__0_9, a__1_9, 0, 0]
mult=is_valid * 1, args=[a__2_9, a__3_9, 0, 0]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
(7864320 * a__0_1 - (7864320 * rs1_data__0_0 + 7864320 * is_valid)) * (7864320 * a__0_1 - (7864320 * rs1_data__0_0 + 7864321)) = 0
(30720 * a__0_1 + 7864320 * a__1_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 30720 * is_valid)) * (30720 * a__0_1 + 7864320 * a__1_1 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 30721)) = 0
(120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * rs1_data__0_0 + 30720 * rs1_data__1_0 + 7864320 * rs1_data__2_0 + 120 * is_valid)) * (120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * rs1_data__0_0 + 30720 * rs1_data__1_0 + 7864320 * rs1_data__2_0 + 121)) = 0
(943718400 * rs1_data__0_0 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718400 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * a__0_1)) * (943718400 * rs1_data__0_0 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718399 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * a__0_1)) = 0
(7864320 * a__0_2 - (7864320 * b__0_2 + 7864320 * is_valid)) * (7864320 * a__0_2 - (7864320 * b__0_2 + 7864321)) = 0
(30720 * a__0_2 + 7864320 * a__1_2 - (30720 * b__0_2 + 7864320 * b__1_2 + 30720 * is_valid)) * (30720 * a__0_2 + 7864320 * a__1_2 - (30720 * b__0_2 + 7864320 * b__1_2 + 30721)) = 0
(120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * b__0_2 + 30720 * b__1_2 + 7864320 * b__2_2 + 120 * is_valid)) * (120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * b__0_2 + 30720 * b__1_2 + 7864320 * b__2_2 + 121)) = 0
(120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718400 * b__0_2 + 943718400 * is_valid - (943718400 * a__0_2 + 120 * b__1_2 + 30720 * b__2_2 + 7864320 * b__3_2)) * (120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718400 * b__0_2 + 943718399 - (943718400 * a__0_2 + 120 * b__1_2 + 30720 * b__2_2 + 7864320 * b__3_2)) = 0
flags__0_3 * ((flags__0_3 - 1) * (flags__0_3 - 2)) = 0
flags__1_3 * ((flags__1_3 - 1) * (flags__1_3 - 2)) = 0
flags__2_3 * ((flags__2_3 - 1) * (flags__2_3 - 2)) = 0
flags__3_3 * ((flags__3_3 - 1) * (flags__3_3 - 2)) = 0
(flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 1 * is_valid) * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) = 0
1006632960 * flags__0_3 * (flags__0_3 - 1) + 1006632960 * flags__1_3 * (flags__1_3 - 1) + 1006632960 * flags__2_3 * (flags__2_3 - 1) + 1006632960 * flags__3_3 * (flags__3_3 - 1) + flags__0_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + flags__1_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + flags__2_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) = 0
(1006632960 * flags__0_3 * (flags__0_3 - 1) + 1006632960 * flags__1_3 * (flags__1_3 - 1) + 1006632960 * flags__3_3 * (flags__3_3 - 1)) * read_data__0_3 + flags__0_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) * (255 * data_most_sig_bit_0) + (1006632960 * flags__2_3 * (flags__2_3 - 1) + flags__1_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2)) * (255 * data_most_sig_bit_0) + flags__2_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) * (255 * data_most_sig_bit_0) + (flags__3_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) - (flags__0_3 * flags__1_3 + flags__0_3 * flags__3_3)) * read_data__0_3 + write_data__0_3 - (flags__0_3 * flags__2_3 + flags__1_3 * flags__2_3 + flags__1_3 * flags__3_3 + flags__2_3 * flags__3_3) * prev_data__0_3 = 0
(1006632960 * flags__0_3 * (flags__0_3 - 1) + 1006632960 * flags__1_3 * (flags__1_3 - 1)) * (255 * data_most_sig_bit_0) + 1006632960 * flags__2_3 * (flags__2_3 - 1) * (255 * data_most_sig_bit_0) + (flags__3_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) - flags__0_3 * flags__1_3) * (255 * data_most_sig_bit_0) + write_data__1_3 - (flags__1_3 * flags__2_3 * read_data__0_3 + (flags__0_3 * flags__2_3 + flags__0_3 * flags__3_3 + flags__1_3 * flags__3_3 + flags__2_3 * flags__3_3) * prev_data__1_3) = 0
1006632960 * flags__0_3 * (flags__0_3 - 1) * (255 * data_most_sig_bit_0) + flags__3_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) * (255 * data_most_sig_bit_0) + write_data__2_3 - ((flags__0_3 * flags__2_3 + flags__1_3 * flags__3_3) * read_data__0_3 + (flags__0_3 * flags__1_3 + flags__0_3 * flags__3_3 + flags__1_3 * flags__2_3 + flags__2_3 * flags__3_3) * prev_data__2_3) = 0
1006632960 * flags__0_3 * (flags__0_3 - 1) * (255 * data_most_sig_bit_0) + flags__3_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) * (255 * data_most_sig_bit_0) + write_data__3_3 - (flags__2_3 * flags__3_3 * read_data__0_3 + flags__0_3 * flags__2_3 * (255 * data_most_sig_bit_0) + (flags__0_3 * flags__1_3 + flags__0_3 * flags__3_3 + flags__1_3 * flags__2_3 + flags__1_3 * flags__3_3) * prev_data__3_3) = 0
(30720 * mem_ptr_limbs__0_3 - (30720 * b__0_2 + 7864320 * b__1_2)) * (30720 * mem_ptr_limbs__0_3 - (30720 * b__0_2 + 7864320 * b__1_2 + 1)) = 0
(943718400 * b__0_2 + 30720 * mem_ptr_limbs__1_3 - (120 * b__1_2 + 30720 * b__2_2 + 7864320 * b__3_2 + 943718400 * mem_ptr_limbs__0_3)) * (943718400 * b__0_2 + 30720 * mem_ptr_limbs__1_3 - (120 * b__1_2 + 30720 * b__2_2 + 7864320 * b__3_2 + 943718400 * mem_ptr_limbs__0_3 + 1)) = 0
flags__1_3 * (flags__1_3 - 1) + flags__2_3 * (flags__2_3 - 1) + 4 * flags__0_3 * flags__1_3 + 4 * flags__0_3 * flags__2_3 + 5 * flags__0_3 * flags__3_3 + 5 * flags__1_3 * flags__2_3 + 5 * flags__1_3 * flags__3_3 + 5 * flags__2_3 * flags__3_3 - (1006632960 * flags__3_3 * (flags__3_3 - 1) + flags__0_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + flags__1_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + flags__2_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 3 * flags__3_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 5 * is_valid) = 0
(7864320 * a__0_4 + 7864321 * is_valid - 7864320 * writes_aux__prev_data__0_4) * (7864320 * a__0_4 + 7864320 - 7864320 * writes_aux__prev_data__0_4) = 0
(30720 * a__0_4 + 7864320 * a__1_4 + 30721 * is_valid - (30720 * writes_aux__prev_data__0_4 + 7864320 * writes_aux__prev_data__1_4)) * (30720 * a__0_4 + 7864320 * a__1_4 + 30720 - (30720 * writes_aux__prev_data__0_4 + 7864320 * writes_aux__prev_data__1_4)) = 0
(120 * a__0_4 + 30720 * a__1_4 + 7864320 * a__2_4 + 121 * is_valid - (120 * writes_aux__prev_data__0_4 + 30720 * writes_aux__prev_data__1_4 + 7864320 * writes_aux__prev_data__2_4)) * (120 * a__0_4 + 30720 * a__1_4 + 7864320 * a__2_4 + 120 - (120 * writes_aux__prev_data__0_4 + 30720 * writes_aux__prev_data__1_4 + 7864320 * writes_aux__prev_data__2_4)) = 0
(943718400 * writes_aux__prev_data__0_4 + 120 * a__1_4 + 30720 * a__2_4 + 7864320 * a__3_4 - (120 * writes_aux__prev_data__1_4 + 30720 * writes_aux__prev_data__2_4 + 7864320 * writes_aux__prev_data__3_4 + 943718400 * a__0_4 + 943718399 * is_valid)) * (943718400 * writes_aux__prev_data__0_4 + 120 * a__1_4 + 30720 * a__2_4 + 7864320 * a__3_4 - (120 * writes_aux__prev_data__1_4 + 30720 * writes_aux__prev_data__2_4 + 7864320 * writes_aux__prev_data__3_4 + 943718400 * a__0_4 + 943718400)) = 0
cmp_result_6 * (cmp_result_6 - 1) = 0
diff_marker__3_6 * (diff_marker__3_6 - 1) = 0
diff_marker__2_6 * (diff_marker__2_6 - 1) = 0
diff_marker__1_6 * (diff_marker__1_6 - 1) = 0
diff_marker__0_6 * (diff_marker__0_6 - 1) = 0
(1 - (diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6)) * (a__0_5 * (2 * cmp_result_6 - 1)) = 0
diff_marker__0_6 * (diff_val_6 - a__0_5 * (2 * cmp_result_6 - 1)) = 0
(diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6) * (diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6 - 1) = 0
(1 - (diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6)) * cmp_result_6 = 0
cmp_result_7 * (cmp_result_7 - 1) = 0
diff_marker__3_7 * (diff_marker__3_7 - 1) = 0
(1 - diff_marker__3_7) * (a__3_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__3_7 * (diff_val_7 - a__3_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__2_7 * (diff_marker__2_7 - 1) = 0
(1 - (diff_marker__2_7 + diff_marker__3_7)) * (a__2_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__2_7 * (diff_val_7 - a__2_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__1_7 * (diff_marker__1_7 - 1) = 0
(1 - (diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7)) * (a__1_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__1_7 * (diff_val_7 - a__1_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__0_7 * (diff_marker__0_7 - 1) = 0
(1 - (diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7)) * (a__0_4 * (2 * cmp_result_7 - 1)) = 0
diff_marker__0_7 * (diff_val_7 - a__0_4 * (2 * cmp_result_7 - 1)) = 0
(diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7) * (diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7 - 1) = 0
(1 - (diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7)) * cmp_result_7 = 0
(7864320 * a__0_9 - (7864320 * b__0_5 + 7864320 * is_valid)) * (7864320 * a__0_9 - (7864320 * b__0_5 + 7864321)) = 0
(30720 * a__0_9 + 7864320 * a__1_9 - (30720 * b__0_5 + 7864320 * b__1_5 + 30720 * is_valid)) * (30720 * a__0_9 + 7864320 * a__1_9 - (30720 * b__0_5 + 7864320 * b__1_5 + 30721)) = 0
(120 * a__0_9 + 30720 * a__1_9 + 7864320 * a__2_9 - (120 * b__0_5 + 30720 * b__1_5 + 7864320 * b__2_5 + 120 * is_valid)) * (120 * a__0_9 + 30720 * a__1_9 + 7864320 * a__2_9 - (120 * b__0_5 + 30720 * b__1_5 + 7864320 * b__2_5 + 121)) = 0
(943718400 * b__0_5 + 120 * a__1_9 + 30720 * a__2_9 + 7864320 * a__3_9 + 943718400 * is_valid - (120 * b__1_5 + 30720 * b__2_5 + 7864320 * b__3_5 + 943718400 * a__0_9)) * (943718400 * b__0_5 + 120 * a__1_9 + 30720 * a__2_9 + 7864320 * a__3_9 + 943718399 - (120 * b__1_5 + 30720 * b__2_5 + 7864320 * b__3_5 + 943718400 * a__0_9)) = 0
cmp_result_12 * (cmp_result_12 - 1) = 0
flags__2_3 * (flags__2_3 - 1) - (flags__0_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 2 * flags__1_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2) + 3 * flags__2_3 * (flags__0_3 + flags__1_3 + flags__2_3 + flags__3_3 - 2)) = 0
(1 - cmp_result_12) * (cmp_result_6 * cmp_result_7) = 0
free_var_467 * (cmp_result_6 * cmp_result_7) - cmp_result_12 = 0
opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0 - read_data__0_3 = 0
diff_val_6 * (diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6) = 0
(1 - is_valid) * (diff_marker__0_6 + diff_marker__1_6 + diff_marker__2_6 + diff_marker__3_6) = 0
(1 - is_valid) * (diff_marker__0_7 + diff_marker__1_7 + diff_marker__2_7 + diff_marker__3_7) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/beqz.txt
================================================
Instructions:
  0: BEQ 5 0 8 1 1

APC advantage:
  - Main columns: 26 -> 12 (2.17x reduction)
  - Bus interactions: 11 -> 10 (1.10x reduction)
  - Constraints: 11 -> 4 (2.75x reduction)

Symbolic machine using 12 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  cmp_result_0
  free_var_28
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
cmp_result_0 * (a__0_0 + a__1_0 + a__2_0 + a__3_0) = 0
free_var_28 * (a__0_0 + a__1_0 + a__2_0 + a__3_0) + cmp_result_0 - 1 * is_valid = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/bgez.txt
================================================
Instructions:
  0: BGE 5 0 8 1 1

APC advantage:
  - Main columns: 32 -> 17 (1.88x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 18 (1.39x reduction)

Symbolic machine using 17 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  cmp_result_0
  a_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(a__3_0 - a_msb_f_0) * (a_msb_f_0 + 256 - a__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (a_msb_f_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__3_0 * (a_msb_f_0 * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (a__2_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__2_0 * (a__2_0 * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (a__1_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__1_0 * (a__1_0 * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (a__0_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__0_0 * (a__0_0 * (1 - 2 * cmp_result_0) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 * is_valid - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (1 - cmp_result_0) = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/bgtz.txt
================================================
Instructions:
  0: BLT 0 5 8 1 1

APC advantage:
  - Main columns: 32 -> 17 (1.88x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 18 (1.39x reduction)

Symbolic machine using 17 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  b_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[b_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (b_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * (diff_val_0 - b_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (b__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_val_0 - b__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_val_0 - b__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__0_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_val_0 - b__0_0 * (2 * cmp_result_0 - 1)) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/blez.txt
================================================
Instructions:
  0: BGE 0 5 8 1 1

APC advantage:
  - Main columns: 32 -> 17 (1.88x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 18 (1.39x reduction)

Symbolic machine using 17 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  b_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[b_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (b_msb_f_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__3_0 * (diff_val_0 - b_msb_f_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (b__2_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__2_0 * (diff_val_0 - b__2_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__1_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__1_0 * (diff_val_0 - b__1_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__0_0 * (1 - 2 * cmp_result_0)) = 0
diff_marker__0_0 * (diff_val_0 - b__0_0 * (1 - 2 * cmp_result_0)) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 * is_valid - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (1 - cmp_result_0) = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/bltz.txt
================================================
Instructions:
  0: BLT 5 0 8 1 1

APC advantage:
  - Main columns: 32 -> 17 (1.88x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 18 (1.39x reduction)

Symbolic machine using 17 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  cmp_result_0
  a_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(a__3_0 - a_msb_f_0) * (a_msb_f_0 + 256 - a__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (a_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * (a_msb_f_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (a__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (a__2_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (a__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (a__1_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (a__0_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (a__0_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/bnez.txt
================================================
Instructions:
  0: BNE 5 0 8 1 1

APC advantage:
  - Main columns: 26 -> 12 (2.17x reduction)
  - Bus interactions: 11 -> 10 (1.10x reduction)
  - Constraints: 11 -> 4 (2.75x reduction)

Symbolic machine using 12 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  cmp_result_0
  free_var_28
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 * cmp_result_0 + 4, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(1 - cmp_result_0) * (a__0_0 + a__1_0 + a__2_0 + a__3_0) = 0
free_var_28 * (a__0_0 + a__1_0 + a__2_0 + a__3_0) - cmp_result_0 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/j.txt
================================================
Instructions:
  0: JAL 0 0 8 1 0

APC advantage:
  - Main columns: 18 -> 2 (9.00x reduction)
  - Bus interactions: 10 -> 2 (5.00x reduction)
  - Constraints: 9 -> 1 (9.00x reduction)

Symbolic machine using 2 unique main columns:
  inner__from_state__timestamp_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, inner__from_state__timestamp_0]
mult=is_valid * 1, args=[8, inner__from_state__timestamp_0 + 1]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/jr.txt
================================================
Instructions:
  0: JAL 1 0 8 1 0

APC advantage:
  - Main columns: 18 -> 2 (9.00x reduction)
  - Bus interactions: 10 -> 2 (5.00x reduction)
  - Constraints: 9 -> 1 (9.00x reduction)

Symbolic machine using 2 unique main columns:
  inner__from_state__timestamp_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, inner__from_state__timestamp_0]
mult=is_valid * 1, args=[8, inner__from_state__timestamp_0 + 1]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/load_immediate.txt
================================================
Instructions:
  0: ADD rd_ptr = 48, rs1_ptr = 0, rs2 = 216, rs2_as = 0

APC advantage:
  - Main columns: 36 -> 10 (3.60x reduction)
  - Bus interactions: 20 -> 10 (2.00x reduction)
  - Constraints: 22 -> 1 (22.00x reduction)

Symbolic machine using 10 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 48, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 48, 216, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/mv.txt
================================================
Instructions:
  0: ADD rd_ptr = 8, rs1_ptr = 5, rs2 = 0, rs2_as = 0

APC advantage:
  - Main columns: 36 -> 14 (2.57x reduction)
  - Bus interactions: 20 -> 10 (2.00x reduction)
  - Constraints: 22 -> 1 (22.00x reduction)

Symbolic machine using 14 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/neg.txt
================================================
Instructions:
  0: SUB rd_ptr = 8, rs1_ptr = 0, rs2 = 5, rs2_as = 1

APC advantage:
  - Main columns: 36 -> 20 (1.80x reduction)
  - Bus interactions: 20 -> 16 (1.25x reduction)
  - Constraints: 22 -> 5 (4.40x reduction)

Symbolic machine using 20 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
(7864320 * a__0_0 + 7864320 * c__0_0) * (7864320 * a__0_0 + 7864320 * c__0_0 + 1) = 0
(30720 * a__0_0 + 7864320 * a__1_0 + 30720 * c__0_0 + 7864320 * c__1_0) * (30720 * a__0_0 + 7864320 * a__1_0 + 30720 * c__0_0 + 7864320 * c__1_0 + 1) = 0
(120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 120 * c__0_0 + 30720 * c__1_0 + 7864320 * c__2_0) * (120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 120 * c__0_0 + 30720 * c__1_0 + 7864320 * c__2_0 + 1) = 0
(943718400 * a__0_0 + 943718400 * c__0_0 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 120 * c__1_0 + 30720 * c__2_0 + 7864320 * c__3_0)) * (943718400 * a__0_0 + 943718400 * c__0_0 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 120 * c__1_0 + 30720 * c__2_0 + 7864320 * c__3_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/not.txt
================================================
Instructions:
  0: XOR rd_ptr = 8, rs1_ptr = 5, rs2 = 16777215, rs2_as = 0

APC advantage:
  - Main columns: 36 -> 14 (2.57x reduction)
  - Bus interactions: 20 -> 10 (2.00x reduction)
  - Constraints: 22 -> 1 (22.00x reduction)

Symbolic machine using 14 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, 255 - a__0_0, 255 - a__1_0, 255 - a__2_0, 255 - a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, 255 - a__0_0, 255 - a__1_0, 255 - a__2_0, 255 - a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/ret.txt
================================================
Instructions:
  0: JALR 0 1 0 1 0

APC advantage:
  - Main columns: 28 -> 11 (2.55x reduction)
  - Bus interactions: 16 -> 8 (2.00x reduction)
  - Constraints: 9 -> 4 (2.25x reduction)

Symbolic machine using 11 unique main columns:
  from_state__timestamp_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  to_pc_least_sig_bit_0
  to_pc_limbs__0_0
  to_pc_limbs__1_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[2 * to_pc_limbs__0_0 + 65536 * to_pc_limbs__1_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 1, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 1, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[to_pc_limbs__1_0, 14]
mult=is_valid * 1, args=[to_pc_limbs__0_0, 15]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
to_pc_least_sig_bit_0 * (to_pc_least_sig_bit_0 - 1) = 0
(30720 * to_pc_least_sig_bit_0 + 61440 * to_pc_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * to_pc_least_sig_bit_0 + 61440 * to_pc_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 125829121 * to_pc_limbs__0_0 + 30720 * to_pc_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * to_pc_least_sig_bit_0)) * (943718400 * rs1_data__0_0 + 125829121 * to_pc_limbs__0_0 + 30720 * to_pc_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * to_pc_least_sig_bit_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/seqz.txt
================================================
Instructions:
  0: SLTU rd_ptr = 8, rs1_ptr = 5, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 37 -> 16 (2.31x reduction)
  - Bus interactions: 18 -> 10 (1.80x reduction)
  - Constraints: 28 -> 4 (7.00x reduction)

Symbolic machine using 16 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  inv_of_sum_37
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, cmp_result_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
cmp_result_0 * (b__0_0 + b__1_0 + b__2_0 + b__3_0) = 0
inv_of_sum_37 * (b__0_0 + b__1_0 + b__2_0 + b__3_0) + cmp_result_0 - 1 * is_valid = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/sgtz.txt
================================================
Instructions:
  0: SLT rd_ptr = 8, rs1_ptr = 0, rs2 = 5, rs2_as = 1

APC advantage:
  - Main columns: 37 -> 23 (1.61x reduction)
  - Bus interactions: 18 -> 16 (1.12x reduction)
  - Constraints: 28 -> 18 (1.56x reduction)

Symbolic machine using 23 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  cmp_result_0
  c_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, cmp_result_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[c_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(c__3_0 - c_msb_f_0) * (c_msb_f_0 + 256 - c__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (c_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * (diff_val_0 - c_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (c__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_val_0 - c__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (c__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_val_0 - c__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (c__0_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_val_0 - c__0_0 * (2 * cmp_result_0 - 1)) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/sltz.txt
================================================
Instructions:
  0: SLT rd_ptr = 8, rs1_ptr = 5, rs2 = 0, rs2_as = 1

APC advantage:
  - Main columns: 37 -> 23 (1.61x reduction)
  - Bus interactions: 18 -> 16 (1.12x reduction)
  - Constraints: 28 -> 18 (1.56x reduction)

Symbolic machine using 23 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  b_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, cmp_result_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[b_msb_f_0 + 128, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (b_msb_f_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * (b_msb_f_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (b__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (b__2_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (b__1_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (b__0_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (b__0_0 * (2 * cmp_result_0 - 1) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/pseudo_instructions/snez.txt
================================================
Instructions:
  0: SLTU rd_ptr = 8, rs1_ptr = 0, rs2 = 5, rs2_as = 1

APC advantage:
  - Main columns: 37 -> 22 (1.68x reduction)
  - Bus interactions: 18 -> 15 (1.20x reduction)
  - Constraints: 28 -> 17 (1.65x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  cmp_result_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, cmp_result_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * (c__3_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * (diff_val_0 - c__3_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * (c__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * (diff_val_0 - c__2_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (c__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * (diff_val_0 - c__1_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (c__0_0 * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * (diff_val_0 - c__0_0 * (2 * cmp_result_0 - 1)) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_add_1.txt
================================================
Instructions:
  0: ADD rd_ptr = 8, rs1_ptr = 8, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 36 -> 12 (3.00x reduction)
  - Bus interactions: 20 -> 8 (2.50x reduction)
  - Constraints: 22 -> 5 (4.40x reduction)

Symbolic machine using 12 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
(7864320 * a__0_0 - (7864320 * writes_aux__prev_data__0_0 + 7864320 * is_valid)) * (7864320 * a__0_0 - (7864320 * writes_aux__prev_data__0_0 + 7864321)) = 0
(30720 * a__0_0 + 7864320 * a__1_0 - (30720 * writes_aux__prev_data__0_0 + 7864320 * writes_aux__prev_data__1_0 + 30720 * is_valid)) * (30720 * a__0_0 + 7864320 * a__1_0 - (30720 * writes_aux__prev_data__0_0 + 7864320 * writes_aux__prev_data__1_0 + 30721)) = 0
(120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 - (120 * writes_aux__prev_data__0_0 + 30720 * writes_aux__prev_data__1_0 + 7864320 * writes_aux__prev_data__2_0 + 120 * is_valid)) * (120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 - (120 * writes_aux__prev_data__0_0 + 30720 * writes_aux__prev_data__1_0 + 7864320 * writes_aux__prev_data__2_0 + 121)) = 0
(943718400 * writes_aux__prev_data__0_0 + 120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_0 + 30720 * writes_aux__prev_data__2_0 + 7864320 * writes_aux__prev_data__3_0 + 943718400 * a__0_0)) * (943718400 * writes_aux__prev_data__0_0 + 120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718399 - (120 * writes_aux__prev_data__1_0 + 30720 * writes_aux__prev_data__2_0 + 7864320 * writes_aux__prev_data__3_0 + 943718400 * a__0_0)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_and_0.txt
================================================
Instructions:
  0: AND rd_ptr = 8, rs1_ptr = 0, rs2 = 5, rs2_as = 0

APC advantage:
  - Main columns: 36 -> 10 (3.60x reduction)
  - Bus interactions: 20 -> 10 (2.00x reduction)
  - Constraints: 22 -> 1 (22.00x reduction)

Symbolic machine using 10 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, 0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_beq.txt
================================================
Instructions:
  0: BEQ 8 5 2 1 1

APC advantage:
  - Main columns: 26 -> 16 (1.62x reduction)
  - Bus interactions: 11 -> 10 (1.10x reduction)
  - Constraints: 11 -> 7 (1.57x reduction)

Symbolic machine using 16 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  free_var_30
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
cmp_result_0 * (a__0_0 - b__0_0) = 0
cmp_result_0 * (a__1_0 - b__1_0) = 0
cmp_result_0 * (a__2_0 - b__2_0) = 0
cmp_result_0 * (a__3_0 - b__3_0) = 0
free_var_30 * ((a__0_0 - b__0_0) * (a__0_0 - b__0_0) + (a__1_0 - b__1_0) * (a__1_0 - b__1_0) + (a__2_0 - b__2_0) * (a__2_0 - b__2_0) + (a__3_0 - b__3_0) * (a__3_0 - b__3_0)) + cmp_result_0 - 1 * is_valid = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_bge.txt
================================================
Instructions:
  0: BGE 8 5 2 1 1

APC advantage:
  - Main columns: 32 -> 22 (1.45x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 19 (1.32x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  a_msb_f_0
  b_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a_msb_f_0 + 128, b_msb_f_0 + 128, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(a__3_0 - a_msb_f_0) * (a_msb_f_0 + 256 - a__3_0) = 0
(b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * ((b_msb_f_0 - a_msb_f_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__3_0 * ((a_msb_f_0 - b_msb_f_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * ((b__2_0 - a__2_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__2_0 * ((a__2_0 - b__2_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__1_0 - a__1_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__1_0 * ((a__1_0 - b__1_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__0_0 - a__0_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__0_0 * ((a__0_0 - b__0_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 * is_valid - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (1 - cmp_result_0) = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_bgeu.txt
================================================
Instructions:
  0: BGEU 8 5 2 1 1

APC advantage:
  - Main columns: 32 -> 20 (1.60x reduction)
  - Bus interactions: 13 -> 11 (1.18x reduction)
  - Constraints: 25 -> 17 (1.47x reduction)

Symbolic machine using 20 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * ((b__3_0 - a__3_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__3_0 * ((a__3_0 - b__3_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * ((b__2_0 - a__2_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__2_0 * ((a__2_0 - b__2_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__1_0 - a__1_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__1_0 * ((a__1_0 - b__1_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__0_0 - a__0_0) * (1 - 2 * cmp_result_0)) = 0
diff_marker__0_0 * ((a__0_0 - b__0_0) * (1 - 2 * cmp_result_0) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 * is_valid - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * (1 - cmp_result_0) = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_blt.txt
================================================
Instructions:
  0: BLT 8 5 2 1 1

APC advantage:
  - Main columns: 32 -> 22 (1.45x reduction)
  - Bus interactions: 13 -> 12 (1.08x reduction)
  - Constraints: 25 -> 19 (1.32x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  a_msb_f_0
  b_msb_f_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[a_msb_f_0 + 128, b_msb_f_0 + 128, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(a__3_0 - a_msb_f_0) * (a_msb_f_0 + 256 - a__3_0) = 0
(b__3_0 - b_msb_f_0) * (b_msb_f_0 + 256 - b__3_0) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * ((b_msb_f_0 - a_msb_f_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * ((a_msb_f_0 - b_msb_f_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * ((b__2_0 - a__2_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * ((a__2_0 - b__2_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__1_0 - a__1_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * ((a__1_0 - b__1_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__0_0 - a__0_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * ((a__0_0 - b__0_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_bltu.txt
================================================
Instructions:
  0: BLTU 8 5 2 1 1

APC advantage:
  - Main columns: 32 -> 20 (1.60x reduction)
  - Bus interactions: 13 -> 11 (1.18x reduction)
  - Constraints: 25 -> 17 (1.47x reduction)

Symbolic machine using 20 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  diff_marker__0_0
  diff_marker__1_0
  diff_marker__2_0
  diff_marker__3_0
  diff_val_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0, args=[diff_val_0 - 1, 0, 0, 0]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
diff_marker__3_0 * (diff_marker__3_0 - 1) = 0
(1 - diff_marker__3_0) * ((b__3_0 - a__3_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__3_0 * ((a__3_0 - b__3_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__2_0 * (diff_marker__2_0 - 1) = 0
(1 - (diff_marker__2_0 + diff_marker__3_0)) * ((b__2_0 - a__2_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__2_0 * ((a__2_0 - b__2_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__1_0 * (diff_marker__1_0 - 1) = 0
(1 - (diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__1_0 - a__1_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__1_0 * ((a__1_0 - b__1_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
diff_marker__0_0 * (diff_marker__0_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * ((b__0_0 - a__0_0) * (2 * cmp_result_0 - 1)) = 0
diff_marker__0_0 * ((a__0_0 - b__0_0) * (2 * cmp_result_0 - 1) + diff_val_0) = 0
(diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0 - 1) = 0
(1 - (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0)) * cmp_result_0 = 0
(1 - is_valid) * (diff_marker__0_0 + diff_marker__1_0 + diff_marker__2_0 + diff_marker__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_bne.txt
================================================
Instructions:
  0: BNE 8 5 2 1 1

APC advantage:
  - Main columns: 26 -> 16 (1.62x reduction)
  - Bus interactions: 11 -> 10 (1.10x reduction)
  - Constraints: 11 -> 7 (1.57x reduction)

Symbolic machine using 16 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  cmp_result_0
  free_var_30
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4 - 2 * cmp_result_0, from_state__timestamp_0 + 2]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
cmp_result_0 * (cmp_result_0 - 1) = 0
(1 - cmp_result_0) * (a__0_0 - b__0_0) = 0
(1 - cmp_result_0) * (a__1_0 - b__1_0) = 0
(1 - cmp_result_0) * (a__2_0 - b__2_0) = 0
(1 - cmp_result_0) * (a__3_0 - b__3_0) = 0
free_var_30 * ((a__0_0 - b__0_0) * (a__0_0 - b__0_0) + (a__1_0 - b__1_0) * (a__1_0 - b__1_0) + (a__2_0 - b__2_0) * (a__2_0 - b__2_0) + (a__3_0 - b__3_0) * (a__3_0 - b__3_0)) - cmp_result_0 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_div.txt
================================================
Instructions:
  0: DIV 8 7 5 1 0

APC advantage:
  - Main columns: 59 -> 48 (1.23x reduction)
  - Bus interactions: 25 -> 24 (1.04x reduction)
  - Constraints: 64 -> 45 (1.42x reduction)

Symbolic machine using 48 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  q__0_0
  q__1_0
  q__2_0
  q__3_0
  r__0_0
  r__1_0
  r__2_0
  r__3_0
  zero_divisor_0
  r_zero_0
  b_sign_0
  c_sign_0
  q_sign_0
  sign_xor_0
  c_sum_inv_0
  r_sum_inv_0
  r_prime__0_0
  r_prime__1_0
  r_prime__2_0
  r_prime__3_0
  r_inv__0_0
  r_inv__1_0
  r_inv__2_0
  r_inv__3_0
  lt_marker__0_0
  lt_marker__1_0
  lt_marker__2_0
  lt_diff_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, q__0_0, q__1_0, q__2_0, q__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * (1 - (zero_divisor_0 + r_zero_0)), args=[lt_diff_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[2 * b__3_0 - 256 * b_sign_0, 2 * c__3_0 - 256 * c_sign_0, 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid * 1, args=[q__0_0, 7864320 * b__0_0 - (7864320 * c__0_0 * q__0_0 + 7864320 * r__0_0)]
mult=is_valid * 1, args=[q__1_0, 30720 * b__0_0 + 7864320 * b__1_0 - (30720 * c__0_0 * q__0_0 + 7864320 * c__0_0 * q__1_0 + 7864320 * c__1_0 * q__0_0 + 30720 * r__0_0 + 7864320 * r__1_0)]
mult=is_valid * 1, args=[q__2_0, 120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * c__0_0 * q__0_0 + 30720 * c__0_0 * q__1_0 + 30720 * c__1_0 * q__0_0 + 7864320 * c__0_0 * q__2_0 + 7864320 * c__1_0 * q__1_0 + 7864320 * c__2_0 * q__0_0 + 120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0)]
mult=is_valid * 1, args=[q__3_0, 943718400 * c__0_0 * q__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * r__0_0 - (120 * c__0_0 * q__1_0 + 120 * c__1_0 * q__0_0 + 30720 * c__0_0 * q__2_0 + 30720 * c__1_0 * q__1_0 + 30720 * c__2_0 * q__0_0 + 7864320 * c__0_0 * q__3_0 + 7864320 * c__1_0 * q__2_0 + 7864320 * c__2_0 * q__1_0 + 7864320 * c__3_0 * q__0_0 + 943718400 * b__0_0 + 120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0)]
mult=is_valid * 1, args=[r__0_0, 3686400 * c__0_0 * q__0_0 + 943718400 * c__0_0 * q__1_0 + 943718400 * c__1_0 * q__0_0 + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 120 * b__2_0 + 30720 * b__3_0 + 3686400 * r__0_0 + 943718400 * r__1_0 - (120 * c__0_0 * q__2_0 + 120 * c__1_0 * q__1_0 + 120 * c__2_0 * q__0_0 + 30720 * c__0_0 * q__3_0 + 30720 * c__1_0 * q__2_0 + 30720 * c__2_0 * q__1_0 + 30720 * c__3_0 * q__0_0 + 7864320 * c__1_0 * q__3_0 + 7864320 * c__2_0 * q__2_0 + 7864320 * c__3_0 * q__1_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 3686400 * b__0_0 + 943718400 * b__1_0 + 120 * r__2_0 + 30720 * r__3_0 + 7864321 * b_sign_0)]
mult=is_valid * 1, args=[r__1_0, 14400 * c__0_0 * q__0_0 + 3686400 * c__0_0 * q__1_0 + 3686400 * c__1_0 * q__0_0 + 943718400 * c__0_0 * q__2_0 + 943718400 * c__1_0 * q__1_0 + 943718400 * c__2_0 * q__0_0 + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 120 * b__3_0 + 14400 * r__0_0 + 3686400 * r__1_0 + 943718400 * r__2_0 - (120 * c__0_0 * q__3_0 + 120 * c__1_0 * q__2_0 + 120 * c__2_0 * q__1_0 + 120 * c__3_0 * q__0_0 + 30720 * c__1_0 * q__3_0 + 30720 * c__2_0 * q__2_0 + 30720 * c__3_0 * q__1_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 7864320 * c__2_0 * q__3_0 + 7864320 * c__3_0 * q__2_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 14400 * b__0_0 + 3686400 * b__1_0 + 943718400 * b__2_0 + 120 * r__3_0 + 30721 * b_sign_0)]
mult=is_valid * 1, args=[r__2_0, 14400 * c__0_0 * q__1_0 + 14400 * c__1_0 * q__0_0 + 3686400 * c__0_0 * q__2_0 + 3686400 * c__1_0 * q__1_0 + 3686400 * c__2_0 * q__0_0 + 943718400 * c__0_0 * q__3_0 + 943718400 * c__1_0 * q__2_0 + 943718400 * c__2_0 * q__1_0 + 943718400 * c__3_0 * q__0_0 + (120 * r_zero_0 - 120) * (255 * b_sign_0) + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 503316424 * b__0_0 + 14400 * r__1_0 + 3686400 * r__2_0 + 943718400 * r__3_0 - (503316424 * c__0_0 * q__0_0 + 120 * c__1_0 * q__3_0 + 120 * c__2_0 * q__2_0 + 120 * c__3_0 * q__1_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * q__0_0 * (255 * c_sign_0) + 30720 * c__2_0 * q__3_0 + 30720 * c__3_0 * q__2_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * q__1_0 * (255 * c_sign_0) + 7864320 * c__3_0 * q__3_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * q__2_0 * (255 * c_sign_0) + 14400 * b__1_0 + 3686400 * b__2_0 + 943718400 * b__3_0 + 503316424 * r__0_0 + 121 * b_sign_0)]
mult=is_valid * 1, args=[r__3_0, 14400 * c__0_0 * q__2_0 + 14400 * c__1_0 * q__1_0 + 14400 * c__2_0 * q__0_0 + 3686400 * c__0_0 * q__3_0 + 3686400 * c__1_0 * q__2_0 + 3686400 * c__2_0 * q__1_0 + 3686400 * c__3_0 * q__0_0 + 943718400 * c__1_0 * q__3_0 + 943718400 * c__2_0 * q__2_0 + 943718400 * c__3_0 * q__1_0 + 943718400 * c__0_0 * (255 * q_sign_0) + 943718400 * q__0_0 * (255 * c_sign_0) + (943718400 - 943718400 * r_zero_0) * (255 * b_sign_0) + (120 * r_zero_0 - 120) * (255 * b_sign_0) + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 442368000 * b__0_0 + 503316424 * b__1_0 + 14400 * r__2_0 + 3686400 * r__3_0 + 943718399 * b_sign_0 - (442368000 * c__0_0 * q__0_0 + 503316424 * c__0_0 * q__1_0 + 503316424 * c__1_0 * q__0_0 + 120 * c__2_0 * q__3_0 + 120 * c__3_0 * q__2_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * q__0_0 * (255 * c_sign_0) + 120 * c__1_0 * (255 * q_sign_0) + 120 * q__1_0 * (255 * c_sign_0) + 30720 * c__3_0 * q__3_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * q__1_0 * (255 * c_sign_0) + 30720 * c__2_0 * (255 * q_sign_0) + 30720 * q__2_0 * (255 * c_sign_0) + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * q__2_0 * (255 * c_sign_0) + 7864320 * c__3_0 * (255 * q_sign_0) + 7864320 * q__3_0 * (255 * c_sign_0) + 14400 * b__2_0 + 3686400 * b__3_0 + 442368000 * r__0_0 + 503316424 * r__1_0)]

// Algebraic constraints:
(zero_divisor_0 + r_zero_0) * (zero_divisor_0 + r_zero_0 - 1) = 0
zero_divisor_0 * (zero_divisor_0 - 1) = 0
zero_divisor_0 * (q__0_0 - 255) = 0
zero_divisor_0 * (q__1_0 - 255) = 0
zero_divisor_0 * (q__2_0 - 255) = 0
zero_divisor_0 * (q__3_0 - 255) = 0
(1 * is_valid - zero_divisor_0) * ((c__0_0 + c__1_0 + c__2_0 + c__3_0) * c_sum_inv_0 - 1) = 0
r_zero_0 * (r_zero_0 - 1) = 0
(1 * is_valid - (zero_divisor_0 + r_zero_0)) * ((r__0_0 + r__1_0 + r__2_0 + r__3_0) * r_sum_inv_0 - 1) = 0
b_sign_0 * (b_sign_0 - 1) = 0
c_sign_0 * (c_sign_0 - 1) = 0
b_sign_0 + c_sign_0 - (2 * b_sign_0 * c_sign_0 + sign_xor_0) = 0
q_sign_0 * (q_sign_0 - 1) = 0
(q__0_0 + q__1_0 + q__2_0 + q__3_0) * ((1 - zero_divisor_0) * (q_sign_0 - sign_xor_0)) = 0
(q_sign_0 - sign_xor_0) * ((1 - zero_divisor_0) * q_sign_0) = 0
(1 - sign_xor_0) * (r__0_0 - r_prime__0_0) = 0
sign_xor_0 * ((7864320 * r__0_0 + 7864320 * r_prime__0_0) * (7864320 * r__0_0 + 7864320 * r_prime__0_0 + 1)) = 0
sign_xor_0 * ((r_prime__0_0 - 256) * r_inv__0_0 - 1) = 0
sign_xor_0 * ((7864320 * r__0_0 + 7864320 * r_prime__0_0 + 1) * r_prime__0_0) = 0
(1 - sign_xor_0) * (r__1_0 - r_prime__1_0) = 0
sign_xor_0 * ((7833600 * r__0_0 + 7833600 * r_prime__0_0 - (7864320 * r__1_0 + 7864320 * r_prime__1_0)) * (30720 * r__0_0 + 7864320 * r__1_0 + 30720 * r_prime__0_0 + 7864320 * r_prime__1_0 + 1)) = 0
sign_xor_0 * ((r_prime__1_0 - 256) * r_inv__1_0 - 1) = 0
sign_xor_0 * ((30720 * r__0_0 + 7864320 * r__1_0 + 30720 * r_prime__0_0 + 7864320 * r_prime__1_0 + 1) * r_prime__1_0) = 0
(1 - sign_xor_0) * (r__2_0 - r_prime__2_0) = 0
sign_xor_0 * ((30600 * r__0_0 + 7833600 * r__1_0 + 30600 * r_prime__0_0 + 7833600 * r_prime__1_0 - (7864320 * r__2_0 + 7864320 * r_prime__2_0)) * (120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0 + 120 * r_prime__0_0 + 30720 * r_prime__1_0 + 7864320 * r_prime__2_0 + 1)) = 0
sign_xor_0 * ((r_prime__2_0 - 256) * r_inv__2_0 - 1) = 0
sign_xor_0 * ((120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0 + 120 * r_prime__0_0 + 30720 * r_prime__1_0 + 7864320 * r_prime__2_0 + 1) * r_prime__2_0) = 0
(1 - sign_xor_0) * (r__3_0 - r_prime__3_0) = 0
sign_xor_0 * ((943718520 * r__0_0 + 30600 * r__1_0 + 7833600 * r__2_0 + 943718520 * r_prime__0_0 + 30600 * r_prime__1_0 + 7833600 * r_prime__2_0 - (7864320 * r__3_0 + 7864320 * r_prime__3_0)) * (943718400 * r__0_0 + 943718400 * r_prime__0_0 - (120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0 + 120 * r_prime__1_0 + 30720 * r_prime__2_0 + 7864320 * r_prime__3_0 + 1))) = 0
sign_xor_0 * ((r_prime__3_0 - 256) * r_inv__3_0 - 1) = 0
sign_xor_0 * ((120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0 + 120 * r_prime__1_0 + 30720 * r_prime__2_0 + 7864320 * r_prime__3_0 + 1 - (943718400 * r__0_0 + 943718400 * r_prime__0_0)) * r_prime__3_0) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) = 0
(lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) * (r_prime__3_0 * (2 * c_sign_0 - 1) + c__3_0 * (1 - 2 * c_sign_0)) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (lt_diff_0 - (r_prime__3_0 * (2 * c_sign_0 - 1) + c__3_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__2_0 * (lt_marker__2_0 - 1) = 0
(lt_marker__0_0 + lt_marker__1_0) * (r_prime__2_0 * (2 * c_sign_0 - 1) + c__2_0 * (1 - 2 * c_sign_0)) = 0
lt_marker__2_0 * (lt_diff_0 - (r_prime__2_0 * (2 * c_sign_0 - 1) + c__2_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__1_0 * (lt_marker__1_0 - 1) = 0
lt_marker__0_0 * (r_prime__1_0 * (2 * c_sign_0 - 1) + c__1_0 * (1 - 2 * c_sign_0)) = 0
lt_marker__1_0 * (lt_diff_0 - (r_prime__1_0 * (2 * c_sign_0 - 1) + c__1_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__0_0 * (lt_marker__0_0 - 1) = 0
lt_marker__0_0 * (lt_diff_0 - (r_prime__0_0 * (2 * c_sign_0 - 1) + c__0_0 * (1 - 2 * c_sign_0))) = 0
zero_divisor_0 * (c__0_0 + c__1_0 + c__2_0 + c__3_0) = 0
r_zero_0 * (r__0_0 + r__1_0 + r__2_0 + r__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_divu.txt
================================================
Instructions:
  0: DIVU 8 7 5 1 0

APC advantage:
  - Main columns: 59 -> 37 (1.59x reduction)
  - Bus interactions: 25 -> 23 (1.09x reduction)
  - Constraints: 64 -> 25 (2.56x reduction)

Symbolic machine using 37 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  q__0_0
  q__1_0
  q__2_0
  q__3_0
  r__0_0
  r__1_0
  r__2_0
  r__3_0
  zero_divisor_0
  r_zero_0
  q_sign_0
  c_sum_inv_0
  r_sum_inv_0
  lt_marker__0_0
  lt_marker__1_0
  lt_marker__2_0
  lt_diff_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, q__0_0, q__1_0, q__2_0, q__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * (1 - (zero_divisor_0 + r_zero_0)), args=[lt_diff_0 - 1, 0, 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid * 1, args=[q__0_0, 7864320 * b__0_0 - (7864320 * c__0_0 * q__0_0 + 7864320 * r__0_0)]
mult=is_valid * 1, args=[q__1_0, 30720 * b__0_0 + 7864320 * b__1_0 - (30720 * c__0_0 * q__0_0 + 7864320 * c__0_0 * q__1_0 + 7864320 * c__1_0 * q__0_0 + 30720 * r__0_0 + 7864320 * r__1_0)]
mult=is_valid * 1, args=[q__2_0, 120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * c__0_0 * q__0_0 + 30720 * c__0_0 * q__1_0 + 30720 * c__1_0 * q__0_0 + 7864320 * c__0_0 * q__2_0 + 7864320 * c__1_0 * q__1_0 + 7864320 * c__2_0 * q__0_0 + 120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0)]
mult=is_valid * 1, args=[q__3_0, 943718400 * c__0_0 * q__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * r__0_0 - (120 * c__0_0 * q__1_0 + 120 * c__1_0 * q__0_0 + 30720 * c__0_0 * q__2_0 + 30720 * c__1_0 * q__1_0 + 30720 * c__2_0 * q__0_0 + 7864320 * c__0_0 * q__3_0 + 7864320 * c__1_0 * q__2_0 + 7864320 * c__2_0 * q__1_0 + 7864320 * c__3_0 * q__0_0 + 943718400 * b__0_0 + 120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0)]
mult=is_valid * 1, args=[r__0_0, 3686400 * c__0_0 * q__0_0 + 943718400 * c__0_0 * q__1_0 + 943718400 * c__1_0 * q__0_0 + 120 * b__2_0 + 30720 * b__3_0 + 3686400 * r__0_0 + 943718400 * r__1_0 - (120 * c__0_0 * q__2_0 + 120 * c__1_0 * q__1_0 + 120 * c__2_0 * q__0_0 + 30720 * c__0_0 * q__3_0 + 30720 * c__1_0 * q__2_0 + 30720 * c__2_0 * q__1_0 + 30720 * c__3_0 * q__0_0 + 7864320 * c__1_0 * q__3_0 + 7864320 * c__2_0 * q__2_0 + 7864320 * c__3_0 * q__1_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 3686400 * b__0_0 + 943718400 * b__1_0 + 120 * r__2_0 + 30720 * r__3_0)]
mult=is_valid * 1, args=[r__1_0, 14400 * c__0_0 * q__0_0 + 3686400 * c__0_0 * q__1_0 + 3686400 * c__1_0 * q__0_0 + 943718400 * c__0_0 * q__2_0 + 943718400 * c__1_0 * q__1_0 + 943718400 * c__2_0 * q__0_0 + 120 * b__3_0 + 14400 * r__0_0 + 3686400 * r__1_0 + 943718400 * r__2_0 - (120 * c__0_0 * q__3_0 + 120 * c__1_0 * q__2_0 + 120 * c__2_0 * q__1_0 + 120 * c__3_0 * q__0_0 + 30720 * c__1_0 * q__3_0 + 30720 * c__2_0 * q__2_0 + 30720 * c__3_0 * q__1_0 + 30720 * c__0_0 * (255 * q_sign_0) + 7864320 * c__2_0 * q__3_0 + 7864320 * c__3_0 * q__2_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 14400 * b__0_0 + 3686400 * b__1_0 + 943718400 * b__2_0 + 120 * r__3_0)]
mult=is_valid * 1, args=[r__2_0, 14400 * c__0_0 * q__1_0 + 14400 * c__1_0 * q__0_0 + 3686400 * c__0_0 * q__2_0 + 3686400 * c__1_0 * q__1_0 + 3686400 * c__2_0 * q__0_0 + 943718400 * c__0_0 * q__3_0 + 943718400 * c__1_0 * q__2_0 + 943718400 * c__2_0 * q__1_0 + 943718400 * c__3_0 * q__0_0 + 503316424 * b__0_0 + 14400 * r__1_0 + 3686400 * r__2_0 + 943718400 * r__3_0 - (503316424 * c__0_0 * q__0_0 + 120 * c__1_0 * q__3_0 + 120 * c__2_0 * q__2_0 + 120 * c__3_0 * q__1_0 + 120 * c__0_0 * (255 * q_sign_0) + 30720 * c__2_0 * q__3_0 + 30720 * c__3_0 * q__2_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 7864320 * c__3_0 * q__3_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 14400 * b__1_0 + 3686400 * b__2_0 + 943718400 * b__3_0 + 503316424 * r__0_0)]
mult=is_valid * 1, args=[r__3_0, 14400 * c__0_0 * q__2_0 + 14400 * c__1_0 * q__1_0 + 14400 * c__2_0 * q__0_0 + 3686400 * c__0_0 * q__3_0 + 3686400 * c__1_0 * q__2_0 + 3686400 * c__2_0 * q__1_0 + 3686400 * c__3_0 * q__0_0 + 943718400 * c__1_0 * q__3_0 + 943718400 * c__2_0 * q__2_0 + 943718400 * c__3_0 * q__1_0 + 943718400 * c__0_0 * (255 * q_sign_0) + 442368000 * b__0_0 + 503316424 * b__1_0 + 14400 * r__2_0 + 3686400 * r__3_0 - (442368000 * c__0_0 * q__0_0 + 503316424 * c__0_0 * q__1_0 + 503316424 * c__1_0 * q__0_0 + 120 * c__2_0 * q__3_0 + 120 * c__3_0 * q__2_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * c__1_0 * (255 * q_sign_0) + 30720 * c__3_0 * q__3_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * c__2_0 * (255 * q_sign_0) + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * c__3_0 * (255 * q_sign_0) + 14400 * b__2_0 + 3686400 * b__3_0 + 442368000 * r__0_0 + 503316424 * r__1_0)]

// Algebraic constraints:
(zero_divisor_0 + r_zero_0) * (zero_divisor_0 + r_zero_0 - 1) = 0
zero_divisor_0 * (zero_divisor_0 - 1) = 0
zero_divisor_0 * (q__0_0 - 255) = 0
zero_divisor_0 * (q__1_0 - 255) = 0
zero_divisor_0 * (q__2_0 - 255) = 0
zero_divisor_0 * (q__3_0 - 255) = 0
(1 * is_valid - zero_divisor_0) * ((c__0_0 + c__1_0 + c__2_0 + c__3_0) * c_sum_inv_0 - 1) = 0
r_zero_0 * (r_zero_0 - 1) = 0
(1 * is_valid - (zero_divisor_0 + r_zero_0)) * ((r__0_0 + r__1_0 + r__2_0 + r__3_0) * r_sum_inv_0 - 1) = 0
q_sign_0 * (q_sign_0 - 1) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) = 0
(lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) * (c__3_0 - r__3_0) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (r__3_0 + lt_diff_0 - c__3_0) = 0
lt_marker__2_0 * (lt_marker__2_0 - 1) = 0
(lt_marker__0_0 + lt_marker__1_0) * (c__2_0 - r__2_0) = 0
lt_marker__2_0 * (r__2_0 + lt_diff_0 - c__2_0) = 0
lt_marker__1_0 * (lt_marker__1_0 - 1) = 0
lt_marker__0_0 * (c__1_0 - r__1_0) = 0
lt_marker__1_0 * (r__1_0 + lt_diff_0 - c__1_0) = 0
lt_marker__0_0 * (lt_marker__0_0 - 1) = 0
lt_marker__0_0 * (r__0_0 + lt_diff_0 - c__0_0) = 0
q_sign_0 * (1 - zero_divisor_0) = 0
zero_divisor_0 * (c__0_0 + c__1_0 + c__2_0 + c__3_0) = 0
r_zero_0 * (r__0_0 + r__1_0 + r__2_0 + r__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadb.txt
================================================
Instructions:
  0: LOADB rd_rs2_ptr = 8, rs1_ptr = 2, imm = 3, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 36 -> 25 (1.44x reduction)
  - Bus interactions: 18 -> 17 (1.06x reduction)
  - Constraints: 18 -> 6 (3.00x reduction)

Symbolic machine using 25 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92160 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92161)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889279 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889278 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadb_imm0.txt
================================================
Instructions:
  0: LOADB rd_rs2_ptr = 8, rs1_ptr = 2, imm = 0, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 36 -> 25 (1.44x reduction)
  - Bus interactions: 18 -> 17 (1.06x reduction)
  - Constraints: 18 -> 6 (3.00x reduction)

Symbolic machine using 25 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, opcode_loadb_flag0_0 * shifted_read_data__0_0 + (1 - opcode_loadb_flag0_0) * shifted_read_data__1_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 1)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadb_x0.txt
================================================
Instructions:
  0: LOADB rd_rs2_ptr = 0, rs1_ptr = 2, imm = 3, mem_as = 2, needs_write = 0, imm_sign = 0

APC advantage:
  - Main columns: 36 -> 19 (1.89x reduction)
  - Bus interactions: 18 -> 13 (1.38x reduction)
  - Constraints: 18 -> 6 (3.00x reduction)

Symbolic machine using 19 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  opcode_loadb_flag0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 + opcode_loadb_flag0_0 - (2 * shift_most_sig_bit_0 + 1), shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__0_0 * opcode_loadb_flag0_0 + shifted_read_data__1_0 * (1 - opcode_loadb_flag0_0) - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 + 503316480 - (503316480 * mem_ptr_limbs__0_0 + 503316480 * opcode_loadb_flag0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
opcode_loadb_flag0_0 * (opcode_loadb_flag0_0 - 1) = 0
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92160 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92161)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889279 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889278 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadbu.txt
================================================
Instructions:
  0: LOADBU rd_rs2_ptr = 8, rs1_ptr = 2, imm = 21, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 41 -> 27 (1.52x reduction)
  - Bus interactions: 17 -> 16 (1.06x reduction)
  - Constraints: 25 -> 15 (1.67x reduction)

Symbolic machine using 27 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  flags__0_0
  flags__1_0
  flags__2_0
  flags__3_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  write_data__0_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, write_data__0_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316480 * flags__1_0 * flags__2_0 + 1006632960 * flags__0_0 * flags__2_0 + 1006632960 * flags__1_0 * flags__3_0 - (503316480 * flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1006632960 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316481 * flags__2_0 * flags__3_0 + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
flags__0_0 * ((flags__0_0 - 1) * (flags__0_0 - 2)) = 0
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
flags__3_0 * ((flags__3_0 - 1) * (flags__3_0 - 2)) = 0
(flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 1 * is_valid) * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1 * is_valid = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1)) * read_data__0_0 + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__1_0 + (1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) * read_data__2_0 + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0)) * read_data__0_0 + write_data__0_0 - (flags__0_0 * flags__2_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__0_0 = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1)) * read_data__1_0 + 1006632960 * flags__2_0 * (flags__2_0 - 1) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - flags__0_0 * flags__1_0) * read_data__1_0 - (flags__1_0 * flags__2_0 * read_data__0_0 + (flags__0_0 * flags__2_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__1_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__2_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__2_0 - ((flags__0_0 * flags__2_0 + flags__1_0 * flags__3_0) * read_data__0_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__2_0 * flags__3_0) * prev_data__2_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__3_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 - (flags__2_0 * flags__3_0 * read_data__0_0 + flags__0_0 * flags__2_0 * read_data__1_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0) * prev_data__3_0) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 645120 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 645121)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 314572810 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 314572811)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 4 * flags__0_0 * flags__1_0 + 4 * flags__0_0 * flags__2_0 + 5 * flags__0_0 * flags__3_0 + 5 * flags__1_0 * flags__2_0 + 5 * flags__1_0 * flags__3_0 + 5 * flags__2_0 * flags__3_0 - (1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1 * is_valid) = 0
flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadh.txt
================================================
Instructions:
  0: LOADH rd_rs2_ptr = 8, rs1_ptr = 2, imm = 6, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 36 -> 24 (1.50x reduction)
  - Bus interactions: 18 -> 17 (1.06x reduction)
  - Constraints: 18 -> 5 (3.60x reduction)

Symbolic machine using 24 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  shift_most_sig_bit_0
  data_most_sig_bit_0
  shifted_read_data__0_0
  shifted_read_data__1_0
  shifted_read_data__2_0
  shifted_read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - 2 * shift_most_sig_bit_0, shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - 2 * shift_most_sig_bit_0, shift_most_sig_bit_0 * shifted_read_data__2_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__0_0, shift_most_sig_bit_0 * shifted_read_data__3_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__1_0, shift_most_sig_bit_0 * shifted_read_data__0_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__2_0, shift_most_sig_bit_0 * shifted_read_data__1_0 + (1 - shift_most_sig_bit_0) * shifted_read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, shifted_read_data__0_0, shifted_read_data__1_0, 255 * data_most_sig_bit_0, 255 * data_most_sig_bit_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[shifted_read_data__1_0 - 128 * data_most_sig_bit_0, 7]
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[1006632960 * shift_most_sig_bit_0 - 503316480 * mem_ptr_limbs__0_0, 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
data_most_sig_bit_0 * (data_most_sig_bit_0 - 1) = 0
shift_most_sig_bit_0 * (shift_most_sig_bit_0 - 1) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 184320 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 184321)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 377487363 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 377487364)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadhu.txt
================================================
Instructions:
  0: LOADHU rd_rs2_ptr = 0, rs1_ptr = 2, imm = 22, mem_as = 2, needs_write = 0, imm_sign = 0

APC advantage:
  - Main columns: 41 -> 18 (2.28x reduction)
  - Bus interactions: 17 -> 12 (1.42x reduction)
  - Constraints: 25 -> 9 (2.78x reduction)

Symbolic machine using 18 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  flags__1_0
  flags__2_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, 2 * flags__1_0 * (flags__1_0 + flags__2_0 - 2) + 3 * flags__2_0 * (flags__1_0 + flags__2_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, 2 * flags__1_0 * (flags__1_0 + flags__2_0 - 2) + 3 * flags__2_0 * (flags__1_0 + flags__2_0 - 2) + mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - flags__2_0 * (flags__2_0 - 1), read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__1_0 + flags__2_0 - 2) + 503316480 * flags__1_0 * flags__2_0 - (1006632960 * flags__1_0 * (flags__1_0 + flags__2_0 - 2) + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]

// Algebraic constraints:
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
(flags__1_0 + flags__2_0 - 1 * is_valid) * (flags__1_0 + flags__2_0 - 2) = 0
1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__1_0 + flags__2_0 - 2) + flags__2_0 * (flags__1_0 + flags__2_0 - 2) + 1 * is_valid = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 675840 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 675841)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 629145590 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 629145589 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 5 * flags__1_0 * flags__2_0 - (flags__1_0 * (flags__1_0 + flags__2_0 - 2) + flags__2_0 * (flags__1_0 + flags__2_0 - 2) + 2 * is_valid) = 0
flags__1_0 * flags__2_0 = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_loadw.txt
================================================
Instructions:
  0: LOADW rd_rs2_ptr = 8, rs1_ptr = 2, imm = 20, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 41 -> 22 (1.86x reduction)
  - Bus interactions: 17 -> 16 (1.06x reduction)
  - Constraints: 25 -> 3 (8.33x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614400 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 614401)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 754974711 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 754974710 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_mul.txt
================================================
Instructions:
  0: MUL 8 7 5 1 0

APC advantage:
  - Main columns: 31 -> 24 (1.29x reduction)
  - Bus interactions: 19 -> 18 (1.06x reduction)
  - Constraints: 4 -> 1 (4.00x reduction)

Symbolic machine using 24 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid * 1, args=[a__0_0, 7864320 * a__0_0 - 7864320 * b__0_0 * c__0_0]
mult=is_valid * 1, args=[a__1_0, 30720 * a__0_0 + 7864320 * a__1_0 - (30720 * b__0_0 * c__0_0 + 7864320 * b__0_0 * c__1_0 + 7864320 * b__1_0 * c__0_0)]
mult=is_valid * 1, args=[a__2_0, 120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 - (120 * b__0_0 * c__0_0 + 30720 * b__0_0 * c__1_0 + 30720 * b__1_0 * c__0_0 + 7864320 * b__0_0 * c__2_0 + 7864320 * b__1_0 * c__1_0 + 7864320 * b__2_0 * c__0_0)]
mult=is_valid * 1, args=[a__3_0, 943718400 * b__0_0 * c__0_0 + 120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 - (120 * b__0_0 * c__1_0 + 120 * b__1_0 * c__0_0 + 30720 * b__0_0 * c__2_0 + 30720 * b__1_0 * c__1_0 + 30720 * b__2_0 * c__0_0 + 7864320 * b__0_0 * c__3_0 + 7864320 * b__1_0 * c__2_0 + 7864320 * b__2_0 * c__1_0 + 7864320 * b__3_0 * c__0_0 + 943718400 * a__0_0)]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_rem.txt
================================================
Instructions:
  0: REM 8 7 5 1 0

APC advantage:
  - Main columns: 59 -> 48 (1.23x reduction)
  - Bus interactions: 25 -> 24 (1.04x reduction)
  - Constraints: 64 -> 45 (1.42x reduction)

Symbolic machine using 48 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  q__0_0
  q__1_0
  q__2_0
  q__3_0
  r__0_0
  r__1_0
  r__2_0
  r__3_0
  zero_divisor_0
  r_zero_0
  b_sign_0
  c_sign_0
  q_sign_0
  sign_xor_0
  c_sum_inv_0
  r_sum_inv_0
  r_prime__0_0
  r_prime__1_0
  r_prime__2_0
  r_prime__3_0
  r_inv__0_0
  r_inv__1_0
  r_inv__2_0
  r_inv__3_0
  lt_marker__0_0
  lt_marker__1_0
  lt_marker__2_0
  lt_diff_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, r__0_0, r__1_0, r__2_0, r__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * (1 - (zero_divisor_0 + r_zero_0)), args=[lt_diff_0 - 1, 0, 0, 0]
mult=is_valid * 1, args=[2 * b__3_0 - 256 * b_sign_0, 2 * c__3_0 - 256 * c_sign_0, 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid * 1, args=[q__0_0, 7864320 * b__0_0 - (7864320 * c__0_0 * q__0_0 + 7864320 * r__0_0)]
mult=is_valid * 1, args=[q__1_0, 30720 * b__0_0 + 7864320 * b__1_0 - (30720 * c__0_0 * q__0_0 + 7864320 * c__0_0 * q__1_0 + 7864320 * c__1_0 * q__0_0 + 30720 * r__0_0 + 7864320 * r__1_0)]
mult=is_valid * 1, args=[q__2_0, 120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * c__0_0 * q__0_0 + 30720 * c__0_0 * q__1_0 + 30720 * c__1_0 * q__0_0 + 7864320 * c__0_0 * q__2_0 + 7864320 * c__1_0 * q__1_0 + 7864320 * c__2_0 * q__0_0 + 120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0)]
mult=is_valid * 1, args=[q__3_0, 943718400 * c__0_0 * q__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * r__0_0 - (120 * c__0_0 * q__1_0 + 120 * c__1_0 * q__0_0 + 30720 * c__0_0 * q__2_0 + 30720 * c__1_0 * q__1_0 + 30720 * c__2_0 * q__0_0 + 7864320 * c__0_0 * q__3_0 + 7864320 * c__1_0 * q__2_0 + 7864320 * c__2_0 * q__1_0 + 7864320 * c__3_0 * q__0_0 + 943718400 * b__0_0 + 120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0)]
mult=is_valid * 1, args=[r__0_0, 3686400 * c__0_0 * q__0_0 + 943718400 * c__0_0 * q__1_0 + 943718400 * c__1_0 * q__0_0 + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 120 * b__2_0 + 30720 * b__3_0 + 3686400 * r__0_0 + 943718400 * r__1_0 - (120 * c__0_0 * q__2_0 + 120 * c__1_0 * q__1_0 + 120 * c__2_0 * q__0_0 + 30720 * c__0_0 * q__3_0 + 30720 * c__1_0 * q__2_0 + 30720 * c__2_0 * q__1_0 + 30720 * c__3_0 * q__0_0 + 7864320 * c__1_0 * q__3_0 + 7864320 * c__2_0 * q__2_0 + 7864320 * c__3_0 * q__1_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 3686400 * b__0_0 + 943718400 * b__1_0 + 120 * r__2_0 + 30720 * r__3_0 + 7864321 * b_sign_0)]
mult=is_valid * 1, args=[r__1_0, 14400 * c__0_0 * q__0_0 + 3686400 * c__0_0 * q__1_0 + 3686400 * c__1_0 * q__0_0 + 943718400 * c__0_0 * q__2_0 + 943718400 * c__1_0 * q__1_0 + 943718400 * c__2_0 * q__0_0 + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 120 * b__3_0 + 14400 * r__0_0 + 3686400 * r__1_0 + 943718400 * r__2_0 - (120 * c__0_0 * q__3_0 + 120 * c__1_0 * q__2_0 + 120 * c__2_0 * q__1_0 + 120 * c__3_0 * q__0_0 + 30720 * c__1_0 * q__3_0 + 30720 * c__2_0 * q__2_0 + 30720 * c__3_0 * q__1_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 7864320 * c__2_0 * q__3_0 + 7864320 * c__3_0 * q__2_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 14400 * b__0_0 + 3686400 * b__1_0 + 943718400 * b__2_0 + 120 * r__3_0 + 30721 * b_sign_0)]
mult=is_valid * 1, args=[r__2_0, 14400 * c__0_0 * q__1_0 + 14400 * c__1_0 * q__0_0 + 3686400 * c__0_0 * q__2_0 + 3686400 * c__1_0 * q__1_0 + 3686400 * c__2_0 * q__0_0 + 943718400 * c__0_0 * q__3_0 + 943718400 * c__1_0 * q__2_0 + 943718400 * c__2_0 * q__1_0 + 943718400 * c__3_0 * q__0_0 + (120 * r_zero_0 - 120) * (255 * b_sign_0) + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 503316424 * b__0_0 + 14400 * r__1_0 + 3686400 * r__2_0 + 943718400 * r__3_0 - (503316424 * c__0_0 * q__0_0 + 120 * c__1_0 * q__3_0 + 120 * c__2_0 * q__2_0 + 120 * c__3_0 * q__1_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * q__0_0 * (255 * c_sign_0) + 30720 * c__2_0 * q__3_0 + 30720 * c__3_0 * q__2_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * q__1_0 * (255 * c_sign_0) + 7864320 * c__3_0 * q__3_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * q__2_0 * (255 * c_sign_0) + 14400 * b__1_0 + 3686400 * b__2_0 + 943718400 * b__3_0 + 503316424 * r__0_0 + 121 * b_sign_0)]
mult=is_valid * 1, args=[r__3_0, 14400 * c__0_0 * q__2_0 + 14400 * c__1_0 * q__1_0 + 14400 * c__2_0 * q__0_0 + 3686400 * c__0_0 * q__3_0 + 3686400 * c__1_0 * q__2_0 + 3686400 * c__2_0 * q__1_0 + 3686400 * c__3_0 * q__0_0 + 943718400 * c__1_0 * q__3_0 + 943718400 * c__2_0 * q__2_0 + 943718400 * c__3_0 * q__1_0 + 943718400 * c__0_0 * (255 * q_sign_0) + 943718400 * q__0_0 * (255 * c_sign_0) + (943718400 - 943718400 * r_zero_0) * (255 * b_sign_0) + (120 * r_zero_0 - 120) * (255 * b_sign_0) + (30720 * r_zero_0 - 30720) * (255 * b_sign_0) + (7864320 * r_zero_0 - 7864320) * (255 * b_sign_0) + 442368000 * b__0_0 + 503316424 * b__1_0 + 14400 * r__2_0 + 3686400 * r__3_0 + 943718399 * b_sign_0 - (442368000 * c__0_0 * q__0_0 + 503316424 * c__0_0 * q__1_0 + 503316424 * c__1_0 * q__0_0 + 120 * c__2_0 * q__3_0 + 120 * c__3_0 * q__2_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * q__0_0 * (255 * c_sign_0) + 120 * c__1_0 * (255 * q_sign_0) + 120 * q__1_0 * (255 * c_sign_0) + 30720 * c__3_0 * q__3_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * q__0_0 * (255 * c_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * q__1_0 * (255 * c_sign_0) + 30720 * c__2_0 * (255 * q_sign_0) + 30720 * q__2_0 * (255 * c_sign_0) + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * q__0_0 * (255 * c_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * q__1_0 * (255 * c_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * q__2_0 * (255 * c_sign_0) + 7864320 * c__3_0 * (255 * q_sign_0) + 7864320 * q__3_0 * (255 * c_sign_0) + 14400 * b__2_0 + 3686400 * b__3_0 + 442368000 * r__0_0 + 503316424 * r__1_0)]

// Algebraic constraints:
(zero_divisor_0 + r_zero_0) * (zero_divisor_0 + r_zero_0 - 1) = 0
zero_divisor_0 * (zero_divisor_0 - 1) = 0
zero_divisor_0 * (q__0_0 - 255) = 0
zero_divisor_0 * (q__1_0 - 255) = 0
zero_divisor_0 * (q__2_0 - 255) = 0
zero_divisor_0 * (q__3_0 - 255) = 0
(1 * is_valid - zero_divisor_0) * ((c__0_0 + c__1_0 + c__2_0 + c__3_0) * c_sum_inv_0 - 1) = 0
r_zero_0 * (r_zero_0 - 1) = 0
(1 * is_valid - (zero_divisor_0 + r_zero_0)) * ((r__0_0 + r__1_0 + r__2_0 + r__3_0) * r_sum_inv_0 - 1) = 0
b_sign_0 * (b_sign_0 - 1) = 0
c_sign_0 * (c_sign_0 - 1) = 0
b_sign_0 + c_sign_0 - (2 * b_sign_0 * c_sign_0 + sign_xor_0) = 0
q_sign_0 * (q_sign_0 - 1) = 0
(q__0_0 + q__1_0 + q__2_0 + q__3_0) * ((1 - zero_divisor_0) * (q_sign_0 - sign_xor_0)) = 0
(q_sign_0 - sign_xor_0) * ((1 - zero_divisor_0) * q_sign_0) = 0
(1 - sign_xor_0) * (r__0_0 - r_prime__0_0) = 0
sign_xor_0 * ((7864320 * r__0_0 + 7864320 * r_prime__0_0) * (7864320 * r__0_0 + 7864320 * r_prime__0_0 + 1)) = 0
sign_xor_0 * ((r_prime__0_0 - 256) * r_inv__0_0 - 1) = 0
sign_xor_0 * ((7864320 * r__0_0 + 7864320 * r_prime__0_0 + 1) * r_prime__0_0) = 0
(1 - sign_xor_0) * (r__1_0 - r_prime__1_0) = 0
sign_xor_0 * ((7833600 * r__0_0 + 7833600 * r_prime__0_0 - (7864320 * r__1_0 + 7864320 * r_prime__1_0)) * (30720 * r__0_0 + 7864320 * r__1_0 + 30720 * r_prime__0_0 + 7864320 * r_prime__1_0 + 1)) = 0
sign_xor_0 * ((r_prime__1_0 - 256) * r_inv__1_0 - 1) = 0
sign_xor_0 * ((30720 * r__0_0 + 7864320 * r__1_0 + 30720 * r_prime__0_0 + 7864320 * r_prime__1_0 + 1) * r_prime__1_0) = 0
(1 - sign_xor_0) * (r__2_0 - r_prime__2_0) = 0
sign_xor_0 * ((30600 * r__0_0 + 7833600 * r__1_0 + 30600 * r_prime__0_0 + 7833600 * r_prime__1_0 - (7864320 * r__2_0 + 7864320 * r_prime__2_0)) * (120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0 + 120 * r_prime__0_0 + 30720 * r_prime__1_0 + 7864320 * r_prime__2_0 + 1)) = 0
sign_xor_0 * ((r_prime__2_0 - 256) * r_inv__2_0 - 1) = 0
sign_xor_0 * ((120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0 + 120 * r_prime__0_0 + 30720 * r_prime__1_0 + 7864320 * r_prime__2_0 + 1) * r_prime__2_0) = 0
(1 - sign_xor_0) * (r__3_0 - r_prime__3_0) = 0
sign_xor_0 * ((943718520 * r__0_0 + 30600 * r__1_0 + 7833600 * r__2_0 + 943718520 * r_prime__0_0 + 30600 * r_prime__1_0 + 7833600 * r_prime__2_0 - (7864320 * r__3_0 + 7864320 * r_prime__3_0)) * (943718400 * r__0_0 + 943718400 * r_prime__0_0 - (120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0 + 120 * r_prime__1_0 + 30720 * r_prime__2_0 + 7864320 * r_prime__3_0 + 1))) = 0
sign_xor_0 * ((r_prime__3_0 - 256) * r_inv__3_0 - 1) = 0
sign_xor_0 * ((120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0 + 120 * r_prime__1_0 + 30720 * r_prime__2_0 + 7864320 * r_prime__3_0 + 1 - (943718400 * r__0_0 + 943718400 * r_prime__0_0)) * r_prime__3_0) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) = 0
(lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) * (r_prime__3_0 * (2 * c_sign_0 - 1) + c__3_0 * (1 - 2 * c_sign_0)) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (lt_diff_0 - (r_prime__3_0 * (2 * c_sign_0 - 1) + c__3_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__2_0 * (lt_marker__2_0 - 1) = 0
(lt_marker__0_0 + lt_marker__1_0) * (r_prime__2_0 * (2 * c_sign_0 - 1) + c__2_0 * (1 - 2 * c_sign_0)) = 0
lt_marker__2_0 * (lt_diff_0 - (r_prime__2_0 * (2 * c_sign_0 - 1) + c__2_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__1_0 * (lt_marker__1_0 - 1) = 0
lt_marker__0_0 * (r_prime__1_0 * (2 * c_sign_0 - 1) + c__1_0 * (1 - 2 * c_sign_0)) = 0
lt_marker__1_0 * (lt_diff_0 - (r_prime__1_0 * (2 * c_sign_0 - 1) + c__1_0 * (1 - 2 * c_sign_0))) = 0
lt_marker__0_0 * (lt_marker__0_0 - 1) = 0
lt_marker__0_0 * (lt_diff_0 - (r_prime__0_0 * (2 * c_sign_0 - 1) + c__0_0 * (1 - 2 * c_sign_0))) = 0
zero_divisor_0 * (c__0_0 + c__1_0 + c__2_0 + c__3_0) = 0
r_zero_0 * (r__0_0 + r__1_0 + r__2_0 + r__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_remu.txt
================================================
Instructions:
  0: REMU 8 7 5 1 0

APC advantage:
  - Main columns: 59 -> 37 (1.59x reduction)
  - Bus interactions: 25 -> 23 (1.09x reduction)
  - Constraints: 64 -> 25 (2.56x reduction)

Symbolic machine using 37 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  q__0_0
  q__1_0
  q__2_0
  q__3_0
  r__0_0
  r__1_0
  r__2_0
  r__3_0
  zero_divisor_0
  r_zero_0
  q_sign_0
  c_sum_inv_0
  r_sum_inv_0
  lt_marker__0_0
  lt_marker__1_0
  lt_marker__2_0
  lt_diff_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, r__0_0, r__1_0, r__2_0, r__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * (1 - (zero_divisor_0 + r_zero_0)), args=[lt_diff_0 - 1, 0, 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid * 1, args=[q__0_0, 7864320 * b__0_0 - (7864320 * c__0_0 * q__0_0 + 7864320 * r__0_0)]
mult=is_valid * 1, args=[q__1_0, 30720 * b__0_0 + 7864320 * b__1_0 - (30720 * c__0_0 * q__0_0 + 7864320 * c__0_0 * q__1_0 + 7864320 * c__1_0 * q__0_0 + 30720 * r__0_0 + 7864320 * r__1_0)]
mult=is_valid * 1, args=[q__2_0, 120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * c__0_0 * q__0_0 + 30720 * c__0_0 * q__1_0 + 30720 * c__1_0 * q__0_0 + 7864320 * c__0_0 * q__2_0 + 7864320 * c__1_0 * q__1_0 + 7864320 * c__2_0 * q__0_0 + 120 * r__0_0 + 30720 * r__1_0 + 7864320 * r__2_0)]
mult=is_valid * 1, args=[q__3_0, 943718400 * c__0_0 * q__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * r__0_0 - (120 * c__0_0 * q__1_0 + 120 * c__1_0 * q__0_0 + 30720 * c__0_0 * q__2_0 + 30720 * c__1_0 * q__1_0 + 30720 * c__2_0 * q__0_0 + 7864320 * c__0_0 * q__3_0 + 7864320 * c__1_0 * q__2_0 + 7864320 * c__2_0 * q__1_0 + 7864320 * c__3_0 * q__0_0 + 943718400 * b__0_0 + 120 * r__1_0 + 30720 * r__2_0 + 7864320 * r__3_0)]
mult=is_valid * 1, args=[r__0_0, 3686400 * c__0_0 * q__0_0 + 943718400 * c__0_0 * q__1_0 + 943718400 * c__1_0 * q__0_0 + 120 * b__2_0 + 30720 * b__3_0 + 3686400 * r__0_0 + 943718400 * r__1_0 - (120 * c__0_0 * q__2_0 + 120 * c__1_0 * q__1_0 + 120 * c__2_0 * q__0_0 + 30720 * c__0_0 * q__3_0 + 30720 * c__1_0 * q__2_0 + 30720 * c__2_0 * q__1_0 + 30720 * c__3_0 * q__0_0 + 7864320 * c__1_0 * q__3_0 + 7864320 * c__2_0 * q__2_0 + 7864320 * c__3_0 * q__1_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 3686400 * b__0_0 + 943718400 * b__1_0 + 120 * r__2_0 + 30720 * r__3_0)]
mult=is_valid * 1, args=[r__1_0, 14400 * c__0_0 * q__0_0 + 3686400 * c__0_0 * q__1_0 + 3686400 * c__1_0 * q__0_0 + 943718400 * c__0_0 * q__2_0 + 943718400 * c__1_0 * q__1_0 + 943718400 * c__2_0 * q__0_0 + 120 * b__3_0 + 14400 * r__0_0 + 3686400 * r__1_0 + 943718400 * r__2_0 - (120 * c__0_0 * q__3_0 + 120 * c__1_0 * q__2_0 + 120 * c__2_0 * q__1_0 + 120 * c__3_0 * q__0_0 + 30720 * c__1_0 * q__3_0 + 30720 * c__2_0 * q__2_0 + 30720 * c__3_0 * q__1_0 + 30720 * c__0_0 * (255 * q_sign_0) + 7864320 * c__2_0 * q__3_0 + 7864320 * c__3_0 * q__2_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 14400 * b__0_0 + 3686400 * b__1_0 + 943718400 * b__2_0 + 120 * r__3_0)]
mult=is_valid * 1, args=[r__2_0, 14400 * c__0_0 * q__1_0 + 14400 * c__1_0 * q__0_0 + 3686400 * c__0_0 * q__2_0 + 3686400 * c__1_0 * q__1_0 + 3686400 * c__2_0 * q__0_0 + 943718400 * c__0_0 * q__3_0 + 943718400 * c__1_0 * q__2_0 + 943718400 * c__2_0 * q__1_0 + 943718400 * c__3_0 * q__0_0 + 503316424 * b__0_0 + 14400 * r__1_0 + 3686400 * r__2_0 + 943718400 * r__3_0 - (503316424 * c__0_0 * q__0_0 + 120 * c__1_0 * q__3_0 + 120 * c__2_0 * q__2_0 + 120 * c__3_0 * q__1_0 + 120 * c__0_0 * (255 * q_sign_0) + 30720 * c__2_0 * q__3_0 + 30720 * c__3_0 * q__2_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 7864320 * c__3_0 * q__3_0 + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 14400 * b__1_0 + 3686400 * b__2_0 + 943718400 * b__3_0 + 503316424 * r__0_0)]
mult=is_valid * 1, args=[r__3_0, 14400 * c__0_0 * q__2_0 + 14400 * c__1_0 * q__1_0 + 14400 * c__2_0 * q__0_0 + 3686400 * c__0_0 * q__3_0 + 3686400 * c__1_0 * q__2_0 + 3686400 * c__2_0 * q__1_0 + 3686400 * c__3_0 * q__0_0 + 943718400 * c__1_0 * q__3_0 + 943718400 * c__2_0 * q__2_0 + 943718400 * c__3_0 * q__1_0 + 943718400 * c__0_0 * (255 * q_sign_0) + 442368000 * b__0_0 + 503316424 * b__1_0 + 14400 * r__2_0 + 3686400 * r__3_0 - (442368000 * c__0_0 * q__0_0 + 503316424 * c__0_0 * q__1_0 + 503316424 * c__1_0 * q__0_0 + 120 * c__2_0 * q__3_0 + 120 * c__3_0 * q__2_0 + 120 * c__0_0 * (255 * q_sign_0) + 120 * c__1_0 * (255 * q_sign_0) + 30720 * c__3_0 * q__3_0 + 30720 * c__0_0 * (255 * q_sign_0) + 30720 * c__1_0 * (255 * q_sign_0) + 30720 * c__2_0 * (255 * q_sign_0) + 7864320 * c__0_0 * (255 * q_sign_0) + 7864320 * c__1_0 * (255 * q_sign_0) + 7864320 * c__2_0 * (255 * q_sign_0) + 7864320 * c__3_0 * (255 * q_sign_0) + 14400 * b__2_0 + 3686400 * b__3_0 + 442368000 * r__0_0 + 503316424 * r__1_0)]

// Algebraic constraints:
(zero_divisor_0 + r_zero_0) * (zero_divisor_0 + r_zero_0 - 1) = 0
zero_divisor_0 * (zero_divisor_0 - 1) = 0
zero_divisor_0 * (q__0_0 - 255) = 0
zero_divisor_0 * (q__1_0 - 255) = 0
zero_divisor_0 * (q__2_0 - 255) = 0
zero_divisor_0 * (q__3_0 - 255) = 0
(1 * is_valid - zero_divisor_0) * ((c__0_0 + c__1_0 + c__2_0 + c__3_0) * c_sum_inv_0 - 1) = 0
r_zero_0 * (r_zero_0 - 1) = 0
(1 * is_valid - (zero_divisor_0 + r_zero_0)) * ((r__0_0 + r__1_0 + r__2_0 + r__3_0) * r_sum_inv_0 - 1) = 0
q_sign_0 * (q_sign_0 - 1) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) = 0
(lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0) * (c__3_0 - r__3_0) = 0
(1 - (zero_divisor_0 + r_zero_0 + lt_marker__0_0 + lt_marker__1_0 + lt_marker__2_0)) * (r__3_0 + lt_diff_0 - c__3_0) = 0
lt_marker__2_0 * (lt_marker__2_0 - 1) = 0
(lt_marker__0_0 + lt_marker__1_0) * (c__2_0 - r__2_0) = 0
lt_marker__2_0 * (r__2_0 + lt_diff_0 - c__2_0) = 0
lt_marker__1_0 * (lt_marker__1_0 - 1) = 0
lt_marker__0_0 * (c__1_0 - r__1_0) = 0
lt_marker__1_0 * (r__1_0 + lt_diff_0 - c__1_0) = 0
lt_marker__0_0 * (lt_marker__0_0 - 1) = 0
lt_marker__0_0 * (r__0_0 + lt_diff_0 - c__0_0) = 0
q_sign_0 * (1 - zero_divisor_0) = 0
zero_divisor_0 * (c__0_0 + c__1_0 + c__2_0 + c__3_0) = 0
r_zero_0 * (r__0_0 + r__1_0 + r__2_0 + r__3_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_sll.txt
================================================
Instructions:
  0: SLL rd_ptr = 68, rs1_ptr = 40, rs2 = 3, rs2_as = 0

APC advantage:
  - Main columns: 53 -> 18 (2.94x reduction)
  - Bus interactions: 24 -> 16 (1.50x reduction)
  - Constraints: 76 -> 1 (76.00x reduction)

Symbolic machine using 18 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 68, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 68, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[7864320 * a__0_0 - 62914560 * b__0_0, 3]
mult=is_valid * 1, args=[30720 * a__0_0 + 7864320 * a__1_0 - (245760 * b__0_0 + 62914560 * b__1_0), 3]
mult=is_valid * 1, args=[120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 - (960 * b__0_0 + 245760 * b__1_0 + 62914560 * b__2_0), 3]
mult=is_valid * 1, args=[120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 - (943718400 * a__0_0 + 503316484 * b__0_0 + 960 * b__1_0 + 245760 * b__2_0 + 62914560 * b__3_0), 3]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_sll_by_8.txt
================================================
Instructions:
  0: SLL rd_ptr = 68, rs1_ptr = 40, rs2 = 8, rs2_as = 0

APC advantage:
  - Main columns: 53 -> 14 (3.79x reduction)
  - Bus interactions: 24 -> 10 (2.40x reduction)
  - Constraints: 76 -> 1 (76.00x reduction)

Symbolic machine using 14 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__1_0
  a__2_0
  a__3_0
  b__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, a__1_0, a__2_0, a__3_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, a__1_0, a__2_0, a__3_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 68, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 68, 0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_sra.txt
================================================
Instructions:
  0: SRA rd_ptr = 68, rs1_ptr = 40, rs2 = 3, rs2_as = 1

APC advantage:
  - Main columns: 53 -> 40 (1.32x reduction)
  - Bus interactions: 24 -> 22 (1.09x reduction)
  - Constraints: 76 -> 35 (2.17x reduction)

Symbolic machine using 40 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  bit_multiplier_right_0
  b_sign_0
  bit_shift_marker__0_0
  bit_shift_marker__1_0
  bit_shift_marker__2_0
  bit_shift_marker__3_0
  bit_shift_marker__4_0
  bit_shift_marker__5_0
  bit_shift_marker__6_0
  limb_shift_marker__0_0
  limb_shift_marker__1_0
  limb_shift_marker__2_0
  bit_shift_carry__0_0
  bit_shift_carry__1_0
  bit_shift_carry__2_0
  bit_shift_carry__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 3, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 3, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 68, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 68, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[bit_shift_carry__0_0, 7 - (7 * bit_shift_marker__0_0 + 6 * bit_shift_marker__1_0 + 5 * bit_shift_marker__2_0 + 4 * bit_shift_marker__3_0 + 3 * bit_shift_marker__4_0 + 2 * bit_shift_marker__5_0 + bit_shift_marker__6_0)]
mult=is_valid * 1, args=[bit_shift_carry__1_0, 7 - (7 * bit_shift_marker__0_0 + 6 * bit_shift_marker__1_0 + 5 * bit_shift_marker__2_0 + 4 * bit_shift_marker__3_0 + 3 * bit_shift_marker__4_0 + 2 * bit_shift_marker__5_0 + bit_shift_marker__6_0)]
mult=is_valid * 1, args=[bit_shift_carry__2_0, 7 - (7 * bit_shift_marker__0_0 + 6 * bit_shift_marker__1_0 + 5 * bit_shift_marker__2_0 + 4 * bit_shift_marker__3_0 + 3 * bit_shift_marker__4_0 + 2 * bit_shift_marker__5_0 + bit_shift_marker__6_0)]
mult=is_valid * 1, args=[bit_shift_carry__3_0, 7 - (7 * bit_shift_marker__0_0 + 6 * bit_shift_marker__1_0 + 5 * bit_shift_marker__2_0 + 4 * bit_shift_marker__3_0 + 3 * bit_shift_marker__4_0 + 2 * bit_shift_marker__5_0 + bit_shift_marker__6_0)]
mult=is_valid * 1, args=[503316481 * limb_shift_marker__0_0 - (62914560 * c__0_0 + 440401920 * bit_shift_marker__0_0 + 377487360 * bit_shift_marker__1_0 + 314572800 * bit_shift_marker__2_0 + 251658240 * bit_shift_marker__3_0 + 188743680 * bit_shift_marker__4_0 + 125829120 * bit_shift_marker__5_0 + 62914560 * bit_shift_marker__6_0 + 1006632960 * limb_shift_marker__1_0 + 503316480 * limb_shift_marker__2_0 + 62914561), 3]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[b__3_0, 128, b__3_0 + 128 - 256 * b_sign_0, 1]
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
bit_shift_marker__0_0 * (bit_shift_marker__0_0 - 1) = 0
bit_shift_marker__0_0 * (bit_multiplier_right_0 - 1) = 0
bit_shift_marker__1_0 * (bit_shift_marker__1_0 - 1) = 0
bit_shift_marker__1_0 * (bit_multiplier_right_0 - 2) = 0
bit_shift_marker__2_0 * (bit_shift_marker__2_0 - 1) = 0
bit_shift_marker__2_0 * (bit_multiplier_right_0 - 4) = 0
bit_shift_marker__3_0 * (bit_shift_marker__3_0 - 1) = 0
bit_shift_marker__3_0 * (bit_multiplier_right_0 - 8) = 0
bit_shift_marker__4_0 * (bit_shift_marker__4_0 - 1) = 0
bit_shift_marker__4_0 * (bit_multiplier_right_0 - 16) = 0
bit_shift_marker__5_0 * (bit_shift_marker__5_0 - 1) = 0
bit_shift_marker__5_0 * (bit_multiplier_right_0 - 32) = 0
bit_shift_marker__6_0 * (bit_shift_marker__6_0 - 1) = 0
bit_shift_marker__6_0 * (bit_multiplier_right_0 - 64) = 0
(1 - (bit_shift_marker__0_0 + bit_shift_marker__1_0 + bit_shift_marker__2_0 + bit_shift_marker__3_0 + bit_shift_marker__4_0 + bit_shift_marker__5_0 + bit_shift_marker__6_0)) * (bit_shift_marker__0_0 + bit_shift_marker__1_0 + bit_shift_marker__2_0 + bit_shift_marker__3_0 + bit_shift_marker__4_0 + bit_shift_marker__5_0 + bit_shift_marker__6_0) = 0
(1 * is_valid - (bit_shift_marker__0_0 + bit_shift_marker__1_0 + bit_shift_marker__2_0 + bit_shift_marker__3_0 + bit_shift_marker__4_0 + bit_shift_marker__5_0 + bit_shift_marker__6_0)) * (bit_multiplier_right_0 - 128) = 0
limb_shift_marker__0_0 * (limb_shift_marker__0_0 - 1) = 0
limb_shift_marker__0_0 * (a__0_0 * bit_multiplier_right_0 + bit_shift_carry__0_0 - (b__0_0 + 256 * bit_shift_carry__1_0)) = 0
limb_shift_marker__0_0 * (a__1_0 * bit_multiplier_right_0 + bit_shift_carry__1_0 - (b__1_0 + 256 * bit_shift_carry__2_0)) = 0
limb_shift_marker__0_0 * (a__2_0 * bit_multiplier_right_0 + bit_shift_carry__2_0 - (b__2_0 + 256 * bit_shift_carry__3_0)) = 0
limb_shift_marker__0_0 * (a__3_0 * bit_multiplier_right_0 + bit_shift_carry__3_0 - (256 * b_sign_0 * (bit_multiplier_right_0 - 1) + b__3_0)) = 0
limb_shift_marker__1_0 * (limb_shift_marker__1_0 - 1) = 0
limb_shift_marker__1_0 * (a__0_0 * bit_multiplier_right_0 + bit_shift_carry__1_0 - (b__1_0 + 256 * bit_shift_carry__2_0)) = 0
limb_shift_marker__1_0 * (a__1_0 * bit_multiplier_right_0 + bit_shift_carry__2_0 - (b__2_0 + 256 * bit_shift_carry__3_0)) = 0
limb_shift_marker__1_0 * (a__2_0 * bit_multiplier_right_0 + bit_shift_carry__3_0 - (256 * b_sign_0 * (bit_multiplier_right_0 - 1) + b__3_0)) = 0
limb_shift_marker__2_0 * (limb_shift_marker__2_0 - 1) = 0
limb_shift_marker__2_0 * (a__0_0 * bit_multiplier_right_0 + bit_shift_carry__2_0 - (b__2_0 + 256 * bit_shift_carry__3_0)) = 0
limb_shift_marker__2_0 * (a__1_0 * bit_multiplier_right_0 + bit_shift_carry__3_0 - (256 * b_sign_0 * (bit_multiplier_right_0 - 1) + b__3_0)) = 0
(1 - (limb_shift_marker__0_0 + limb_shift_marker__1_0 + limb_shift_marker__2_0)) * (limb_shift_marker__0_0 + limb_shift_marker__1_0 + limb_shift_marker__2_0) = 0
(1 - (limb_shift_marker__0_0 + limb_shift_marker__1_0 + limb_shift_marker__2_0)) * (a__0_0 * bit_multiplier_right_0 + bit_shift_carry__3_0 - (256 * b_sign_0 * (bit_multiplier_right_0 - 1) + b__3_0)) = 0
(1 - (limb_shift_marker__0_0 + limb_shift_marker__1_0 + limb_shift_marker__2_0)) * (a__1_0 - 255 * b_sign_0) = 0
b_sign_0 * (b_sign_0 - 1) = 0
(a__2_0 - 255 * b_sign_0) * (1 - (limb_shift_marker__0_0 + limb_shift_marker__1_0)) = 0
(a__3_0 - 255 * b_sign_0) * (1 - limb_shift_marker__0_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_srl.txt
================================================
Instructions:
  0: SRL rd_ptr = 68, rs1_ptr = 40, rs2 = 25, rs2_as = 0

APC advantage:
  - Main columns: 53 -> 15 (3.53x reduction)
  - Bus interactions: 24 -> 11 (2.18x reduction)
  - Constraints: 76 -> 2 (38.00x reduction)

Symbolic machine using 15 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 40, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 68, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 68, a__0_0, 0, 0, 0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, 0, 0, 0]

// Algebraic constraints:
(b__3_0 - 2 * a__0_0) * (b__3_0 - (2 * a__0_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_storeb.txt
================================================
Instructions:
  0: STOREB rd_rs2_ptr = 8, rs1_ptr = 2, imm = 3, mem_as = 2, needs_write = 1, imm_sign = 0

APC advantage:
  - Main columns: 41 -> 30 (1.37x reduction)
  - Bus interactions: 17 -> 16 (1.06x reduction)
  - Constraints: 25 -> 15 (1.67x reduction)

Symbolic machine using 30 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  flags__0_0
  flags__1_0
  flags__2_0
  flags__3_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  write_data__0_0
  write_data__1_0
  write_data__2_0
  write_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0), prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__0_0 * flags__2_0 + 2 * flags__1_0 * flags__3_0 + 3 * flags__2_0 * flags__3_0), write_data__0_0, write_data__1_0, write_data__2_0, write_data__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316480 * flags__1_0 * flags__2_0 + 1006632960 * flags__0_0 * flags__2_0 + 1006632960 * flags__1_0 * flags__3_0 - (503316480 * flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 1006632960 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 503316481 * flags__2_0 * flags__3_0 + 503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
flags__0_0 * ((flags__0_0 - 1) * (flags__0_0 - 2)) = 0
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
flags__3_0 * ((flags__3_0 - 1) * (flags__3_0 - 2)) = 0
(flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 1 * is_valid) * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__3_0 * (flags__3_0 - 1)) * read_data__0_0 + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__1_0 + (1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) * read_data__2_0 + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0)) * read_data__0_0 + write_data__0_0 - (flags__0_0 * flags__2_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__0_0 = 0
(1006632960 * flags__0_0 * (flags__0_0 - 1) + 1006632960 * flags__1_0 * (flags__1_0 - 1)) * read_data__1_0 + 1006632960 * flags__2_0 * (flags__2_0 - 1) * read_data__3_0 + (flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) - flags__0_0 * flags__1_0) * read_data__1_0 + write_data__1_0 - (flags__1_0 * flags__2_0 * read_data__0_0 + (flags__0_0 * flags__2_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__3_0 + flags__2_0 * flags__3_0) * prev_data__1_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__2_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__2_0 + write_data__2_0 - ((flags__0_0 * flags__2_0 + flags__1_0 * flags__3_0) * read_data__0_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__2_0 * flags__3_0) * prev_data__2_0) = 0
1006632960 * flags__0_0 * (flags__0_0 - 1) * read_data__3_0 + flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) * read_data__3_0 + write_data__3_0 - (flags__2_0 * flags__3_0 * read_data__0_0 + flags__0_0 * flags__2_0 * read_data__1_0 + (flags__0_0 * flags__1_0 + flags__0_0 * flags__3_0 + flags__1_0 * flags__2_0 + flags__1_0 * flags__3_0) * prev_data__3_0) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92160 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 92161)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889279 * is_valid - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 + 817889278 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 4 * flags__0_0 * flags__1_0 + 4 * flags__0_0 * flags__2_0 + 5 * flags__0_0 * flags__3_0 + 5 * flags__1_0 * flags__2_0 + 5 * flags__1_0 * flags__3_0 + 5 * flags__2_0 * flags__3_0 - (1006632960 * flags__3_0 * (flags__3_0 - 1) + flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__3_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 5 * is_valid) = 0
flags__2_0 * (flags__2_0 - 1) - (flags__0_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 2 * flags__1_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2) + 3 * flags__2_0 * (flags__0_0 + flags__1_0 + flags__2_0 + flags__3_0 - 2)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_storeh.txt
================================================
Instructions:
  0: STOREH rd_rs2_ptr = 8, rs1_ptr = 2, imm = 6, mem_as = 2, needs_write = 1, imm_sign = 1

APC advantage:
  - Main columns: 41 -> 28 (1.46x reduction)
  - Bus interactions: 17 -> 16 (1.06x reduction)
  - Constraints: 25 -> 13 (1.92x reduction)

Symbolic machine using 28 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  flags__1_0
  flags__2_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  write_data__0_0
  write_data__1_0
  write_data__2_0
  write_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__2_0), prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0 - (flags__1_0 * flags__2_0 + 2 * flags__2_0), write_data__0_0, write_data__1_0, write_data__2_0, write_data__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[503316480 * flags__2_0 * (flags__2_0 - 1) + 503316481 * flags__2_0 * (flags__1_0 + flags__2_0 - 1) + 503316480 * flags__1_0 * flags__2_0 + 503316480 * flags__2_0 + 503316480 - (1006632960 * flags__1_0 * (flags__1_0 + flags__2_0 - 1) + 503316480 * mem_ptr_limbs__0_0 + 503316480 * flags__1_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
flags__1_0 * ((flags__1_0 - 1) * (flags__1_0 - 2)) = 0
flags__2_0 * ((flags__2_0 - 1) * (flags__2_0 - 2)) = 0
(flags__1_0 + flags__2_0) * (flags__1_0 + flags__2_0 - 1) = 0
1006632960 * flags__1_0 * (flags__1_0 - 1) + 1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__1_0 + flags__2_0 - 1) + flags__2_0 * (flags__1_0 + flags__2_0 - 1) + flags__1_0 + flags__2_0 - 1 * is_valid = 0
1006632960 * flags__1_0 * (flags__1_0 - 1) * read_data__0_0 + (flags__1_0 + flags__2_0 - 1) * read_data__1_0 + (1006632960 * flags__2_0 * (flags__2_0 - 1) + flags__1_0 * (flags__1_0 + flags__2_0 - 1)) * read_data__2_0 + flags__2_0 * (flags__1_0 + flags__2_0 - 1) * read_data__3_0 + write_data__0_0 - (flags__1_0 * read_data__0_0 + (flags__1_0 * flags__2_0 + flags__2_0) * prev_data__0_0) = 0
1006632960 * flags__1_0 * (flags__1_0 - 1) * read_data__1_0 + 1006632960 * flags__2_0 * (flags__2_0 - 1) * read_data__3_0 + write_data__1_0 - (flags__1_0 * flags__2_0 * read_data__0_0 + flags__1_0 * read_data__1_0 + flags__2_0 * prev_data__1_0) = 0
write_data__2_0 - (flags__2_0 * read_data__0_0 + (flags__1_0 * flags__2_0 + flags__1_0) * prev_data__2_0) = 0
write_data__3_0 - (flags__2_0 * read_data__1_0 + (flags__1_0 * flags__2_0 + flags__1_0) * prev_data__3_0) = 0
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 184320 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 184321)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 377456642 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 377456643)) = 0
flags__1_0 * (flags__1_0 - 1) + flags__2_0 * (flags__2_0 - 1) + 5 * flags__1_0 * flags__2_0 + 3 * flags__1_0 + 3 * flags__2_0 - (flags__1_0 * (flags__1_0 + flags__2_0 - 1) + flags__2_0 * (flags__1_0 + flags__2_0 - 1) + 3 * is_valid) = 0
flags__2_0 * (flags__2_0 - 1) + 1 * is_valid - (2 * flags__1_0 * (flags__1_0 + flags__2_0 - 1) + 3 * flags__2_0 * (flags__1_0 + flags__2_0 - 1) + flags__1_0 + flags__2_0) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_storew.txt
================================================
Instructions:
  0: STOREW rd_rs2_ptr = 8, rs1_ptr = 2, imm = 4, mem_as = 2, needs_write = 1, imm_sign = 1

APC advantage:
  - Main columns: 41 -> 22 (1.86x reduction)
  - Bus interactions: 17 -> 16 (1.06x reduction)
  - Constraints: 25 -> 3 (8.33x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  rs1_data__0_0
  rs1_data__1_0
  rs1_data__2_0
  rs1_data__3_0
  rs1_aux_cols__base__prev_timestamp_0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0
  read_data_aux__base__prev_timestamp_0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0
  mem_ptr_limbs__0_0
  mem_ptr_limbs__1_0
  write_base_aux__prev_timestamp_0
  write_base_aux__timestamp_lt_aux__lower_decomp__0_0
  read_data__0_0
  read_data__1_0
  read_data__2_0
  read_data__3_0
  prev_data__0_0
  prev_data__1_0
  prev_data__2_0
  prev_data__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, rs1_aux_cols__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 2, rs1_data__0_0, rs1_data__1_0, rs1_data__2_0, rs1_data__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, read_data_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, prev_data__0_0, prev_data__1_0, prev_data__2_0, prev_data__3_0, write_base_aux__prev_timestamp_0]
mult=is_valid * 1, args=[2, mem_ptr_limbs__0_0 + 65536 * mem_ptr_limbs__1_0, read_data__0_0, read_data__1_0, read_data__2_0, read_data__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * rs1_aux_cols__base__prev_timestamp_0 + 15360 * rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[-(503316480 * mem_ptr_limbs__0_0), 14]
mult=is_valid * 1, args=[mem_ptr_limbs__1_0, 13]
mult=is_valid * 1, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * read_data_aux__base__prev_timestamp_0 + 15360 * read_data_aux__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * write_base_aux__prev_timestamp_0 + 15360 * write_base_aux__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Algebraic constraints:
(30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 122880 * is_valid)) * (30720 * mem_ptr_limbs__0_0 - (30720 * rs1_data__0_0 + 7864320 * rs1_data__1_0 + 122881)) = 0
(943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 251627521 * is_valid)) * (943718400 * rs1_data__0_0 + 30720 * mem_ptr_limbs__1_0 - (120 * rs1_data__1_0 + 30720 * rs1_data__2_0 + 7864320 * rs1_data__3_0 + 943718400 * mem_ptr_limbs__0_0 + 251627522)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_sub.txt
================================================
Instructions:
  0: SUB rd_ptr = 8, rs1_ptr = 7, rs2 = 5, rs2_as = 1

APC advantage:
  - Main columns: 36 -> 24 (1.50x reduction)
  - Bus interactions: 20 -> 16 (1.25x reduction)
  - Constraints: 22 -> 5 (4.40x reduction)

Symbolic machine using 24 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_0, a__1_0, 0, 0]
mult=is_valid * 1, args=[a__2_0, a__3_0, 0, 0]

// Algebraic constraints:
(7864320 * b__0_0 - (7864320 * a__0_0 + 7864320 * c__0_0)) * (7864320 * b__0_0 - (7864320 * a__0_0 + 7864320 * c__0_0 + 1)) = 0
(30720 * b__0_0 + 7864320 * b__1_0 - (30720 * a__0_0 + 7864320 * a__1_0 + 30720 * c__0_0 + 7864320 * c__1_0)) * (30720 * b__0_0 + 7864320 * b__1_0 - (30720 * a__0_0 + 7864320 * a__1_0 + 30720 * c__0_0 + 7864320 * c__1_0 + 1)) = 0
(120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 120 * c__0_0 + 30720 * c__1_0 + 7864320 * c__2_0)) * (120 * b__0_0 + 30720 * b__1_0 + 7864320 * b__2_0 - (120 * a__0_0 + 30720 * a__1_0 + 7864320 * a__2_0 + 120 * c__0_0 + 30720 * c__1_0 + 7864320 * c__2_0 + 1)) = 0
(943718400 * a__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * c__0_0 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718400 * b__0_0 + 120 * c__1_0 + 30720 * c__2_0 + 7864320 * c__3_0)) * (943718400 * a__0_0 + 120 * b__1_0 + 30720 * b__2_0 + 7864320 * b__3_0 + 943718400 * c__0_0 - (120 * a__1_0 + 30720 * a__2_0 + 7864320 * a__3_0 + 943718400 * b__0_0 + 120 * c__1_0 + 30720 * c__2_0 + 7864320 * c__3_0 + 1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/single_instructions/single_xor.txt
================================================
Instructions:
  0: XOR rd_ptr = 8, rs1_ptr = 7, rs2 = 5, rs2_as = 1

APC advantage:
  - Main columns: 36 -> 24 (1.50x reduction)
  - Bus interactions: 20 -> 18 (1.11x reduction)
  - Constraints: 22 -> 1 (22.00x reduction)

Symbolic machine using 24 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  b__0_0
  b__1_0
  b__2_0
  b__3_0
  c__0_0
  c__1_0
  c__2_0
  c__3_0
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[4, from_state__timestamp_0 + 3]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 7, b__0_0, b__1_0, b__2_0, b__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 5, c__0_0, c__1_0, c__2_0, c__3_0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 8, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0 + 2]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[b__0_0, c__0_0, a__0_0, 1]
mult=is_valid * 1, args=[b__1_0, c__1_0, a__1_0, 1]
mult=is_valid * 1, args=[b__2_0, c__2_0, a__2_0, 1]
mult=is_valid * 1, args=[b__3_0, c__3_0, a__3_0, 1]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/superblocks/beq0_fallthrough.txt
================================================
Instructions:
  0: BEQ 8 0 40 1 1
  4: ADD rd_ptr = 9, rs1_ptr = 9, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 62 -> 21 (2.95x reduction)
  - Bus interactions: 31 -> 16 (1.94x reduction)
  - Constraints: 33 -> 6 (5.50x reduction)

Symbolic machine using 21 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  a__0_0
  a__1_0
  a__2_0
  a__3_0
  reads_aux__0__base__prev_timestamp_1
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1
  writes_aux__prev_data__0_1
  writes_aux__prev_data__1_1
  writes_aux__prev_data__2_1
  writes_aux__prev_data__3_1
  a__0_1
  a__1_1
  a__2_1
  a__3_1
  free_var_64
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[8, from_state__timestamp_0 + 5]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, a__0_0, a__1_0, a__2_0, a__3_0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 9, writes_aux__prev_data__0_1, writes_aux__prev_data__1_1, writes_aux__prev_data__2_1, writes_aux__prev_data__3_1, reads_aux__0__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 9, a__0_1, a__1_1, a__2_1, a__3_1, from_state__timestamp_0 + 4]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_1 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_1, a__1_1, 0, 0]
mult=is_valid * 1, args=[a__2_1, a__3_1, 0, 0]

// Algebraic constraints:
(7864320 * a__0_1 - (7864320 * writes_aux__prev_data__0_1 + 7864320 * is_valid)) * (7864320 * a__0_1 - (7864320 * writes_aux__prev_data__0_1 + 7864321)) = 0
(30720 * a__0_1 + 7864320 * a__1_1 - (30720 * writes_aux__prev_data__0_1 + 7864320 * writes_aux__prev_data__1_1 + 30720 * is_valid)) * (30720 * a__0_1 + 7864320 * a__1_1 - (30720 * writes_aux__prev_data__0_1 + 7864320 * writes_aux__prev_data__1_1 + 30721)) = 0
(120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * writes_aux__prev_data__0_1 + 30720 * writes_aux__prev_data__1_1 + 7864320 * writes_aux__prev_data__2_1 + 120 * is_valid)) * (120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * writes_aux__prev_data__0_1 + 30720 * writes_aux__prev_data__1_1 + 7864320 * writes_aux__prev_data__2_1 + 121)) = 0
(943718400 * writes_aux__prev_data__0_1 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_1 + 30720 * writes_aux__prev_data__2_1 + 7864320 * writes_aux__prev_data__3_1 + 943718400 * a__0_1)) * (943718400 * writes_aux__prev_data__0_1 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718399 - (120 * writes_aux__prev_data__1_1 + 30720 * writes_aux__prev_data__2_1 + 7864320 * writes_aux__prev_data__3_1 + 943718400 * a__0_1)) = 0
free_var_64 * (a__0_0 + a__1_0 + a__2_0 + a__3_0) - 1 * is_valid = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/superblocks/beq0_jump.txt
================================================
Instructions:
   0: BEQ 8 0 40 1 1
  40: ADD rd_ptr = 9, rs1_ptr = 9, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 62 -> 16 (3.88x reduction)
  - Bus interactions: 31 -> 16 (1.94x reduction)
  - Constraints: 33 -> 5 (6.60x reduction)

Symbolic machine using 16 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__1__base__prev_timestamp_0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0
  reads_aux__0__base__prev_timestamp_1
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1
  writes_aux__prev_data__0_1
  writes_aux__prev_data__1_1
  writes_aux__prev_data__2_1
  writes_aux__prev_data__3_1
  a__0_1
  a__1_1
  a__2_1
  a__3_1
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[44, from_state__timestamp_0 + 5]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 8, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 8, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__1__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0 + 1]
mult=is_valid * -1, args=[1, 9, writes_aux__prev_data__0_1, writes_aux__prev_data__1_1, writes_aux__prev_data__2_1, writes_aux__prev_data__3_1, reads_aux__0__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 9, a__0_1, a__1_1, a__2_1, a__3_1, from_state__timestamp_0 + 4]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_0 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_0 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_1 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 15360), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_1, a__1_1, 0, 0]
mult=is_valid * 1, args=[a__2_1, a__3_1, 0, 0]

// Algebraic constraints:
(7864320 * a__0_1 - (7864320 * writes_aux__prev_data__0_1 + 7864320 * is_valid)) * (7864320 * a__0_1 - (7864320 * writes_aux__prev_data__0_1 + 7864321)) = 0
(30720 * a__0_1 + 7864320 * a__1_1 - (30720 * writes_aux__prev_data__0_1 + 7864320 * writes_aux__prev_data__1_1 + 30720 * is_valid)) * (30720 * a__0_1 + 7864320 * a__1_1 - (30720 * writes_aux__prev_data__0_1 + 7864320 * writes_aux__prev_data__1_1 + 30721)) = 0
(120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * writes_aux__prev_data__0_1 + 30720 * writes_aux__prev_data__1_1 + 7864320 * writes_aux__prev_data__2_1 + 120 * is_valid)) * (120 * a__0_1 + 30720 * a__1_1 + 7864320 * a__2_1 - (120 * writes_aux__prev_data__0_1 + 30720 * writes_aux__prev_data__1_1 + 7864320 * writes_aux__prev_data__2_1 + 121)) = 0
(943718400 * writes_aux__prev_data__0_1 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_1 + 30720 * writes_aux__prev_data__2_1 + 7864320 * writes_aux__prev_data__3_1 + 943718400 * a__0_1)) * (943718400 * writes_aux__prev_data__0_1 + 120 * a__1_1 + 30720 * a__2_1 + 7864320 * a__3_1 + 943718399 - (120 * writes_aux__prev_data__1_1 + 30720 * writes_aux__prev_data__2_1 + 7864320 * writes_aux__prev_data__3_1 + 943718400 * a__0_1)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/superblocks/beq_fallthrough.txt
================================================
Instructions:
  0: ADD rd_ptr = 10, rs1_ptr = 0, rs2 = 33, rs2_as = 0
  4: BEQ 8 10 40 1 1
  8: ADD rd_ptr = 9, rs1_ptr = 9, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 98 -> 27 (3.63x reduction)
  - Bus interactions: 51 -> 20 (2.55x reduction)
  - Constraints: 55 -> 6 (9.17x reduction)

Symbolic machine using 27 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  reads_aux__0__base__prev_timestamp_1
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1
  a__0_1
  a__1_1
  a__2_1
  a__3_1
  reads_aux__0__base__prev_timestamp_2
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__prev_data__0_2
  writes_aux__prev_data__1_2
  writes_aux__prev_data__2_2
  writes_aux__prev_data__3_2
  a__0_2
  a__1_2
  a__2_2
  a__3_2
  free_var_103
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[12, from_state__timestamp_0 + 8]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 10, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 8, a__0_1, a__1_1, a__2_1, a__3_1, reads_aux__0__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 8, a__0_1, a__1_1, a__2_1, a__3_1, from_state__timestamp_0 + 3]
mult=is_valid * 1, args=[1, 10, 33, 0, 0, 0, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[1, 9, writes_aux__prev_data__0_2, writes_aux__prev_data__1_2, writes_aux__prev_data__2_2, writes_aux__prev_data__3_2, reads_aux__0__base__prev_timestamp_2]
mult=is_valid * 1, args=[1, 9, a__0_2, a__1_2, a__2_2, a__3_2, from_state__timestamp_0 + 7]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_1 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 30720), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_2 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 61440), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_2, a__1_2, 0, 0]
mult=is_valid * 1, args=[a__2_2, a__3_2, 0, 0]

// Algebraic constraints:
(7864320 * a__0_2 - (7864320 * writes_aux__prev_data__0_2 + 7864320 * is_valid)) * (7864320 * a__0_2 - (7864320 * writes_aux__prev_data__0_2 + 7864321)) = 0
(30720 * a__0_2 + 7864320 * a__1_2 - (30720 * writes_aux__prev_data__0_2 + 7864320 * writes_aux__prev_data__1_2 + 30720 * is_valid)) * (30720 * a__0_2 + 7864320 * a__1_2 - (30720 * writes_aux__prev_data__0_2 + 7864320 * writes_aux__prev_data__1_2 + 30721)) = 0
(120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * writes_aux__prev_data__0_2 + 30720 * writes_aux__prev_data__1_2 + 7864320 * writes_aux__prev_data__2_2 + 120 * is_valid)) * (120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * writes_aux__prev_data__0_2 + 30720 * writes_aux__prev_data__1_2 + 7864320 * writes_aux__prev_data__2_2 + 121)) = 0
(943718400 * writes_aux__prev_data__0_2 + 120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_2 + 30720 * writes_aux__prev_data__2_2 + 7864320 * writes_aux__prev_data__3_2 + 943718400 * a__0_2)) * (943718400 * writes_aux__prev_data__0_2 + 120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718399 - (120 * writes_aux__prev_data__1_2 + 30720 * writes_aux__prev_data__2_2 + 7864320 * writes_aux__prev_data__3_2 + 943718400 * a__0_2)) = 0
free_var_103 * ((a__0_1 - 33) * (a__0_1 - 33) + a__1_1 + a__2_1 + a__3_1) - 1 * is_valid = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/superblocks/beq_jump.txt
================================================
Instructions:
   0: ADD rd_ptr = 10, rs1_ptr = 0, rs2 = 33, rs2_as = 0
   4: BEQ 8 10 40 1 1
  44: ADD rd_ptr = 9, rs1_ptr = 9, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 98 -> 22 (4.45x reduction)
  - Bus interactions: 51 -> 20 (2.55x reduction)
  - Constraints: 55 -> 5 (11.00x reduction)

Symbolic machine using 22 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  reads_aux__0__base__prev_timestamp_1
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1
  reads_aux__0__base__prev_timestamp_2
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__prev_data__0_2
  writes_aux__prev_data__1_2
  writes_aux__prev_data__2_2
  writes_aux__prev_data__3_2
  a__0_2
  a__1_2
  a__2_2
  a__3_2
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[48, from_state__timestamp_0 + 8]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 10, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * -1, args=[1, 8, 33, 0, 0, 0, reads_aux__0__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 8, 33, 0, 0, 0, from_state__timestamp_0 + 3]
mult=is_valid * 1, args=[1, 10, 33, 0, 0, 0, from_state__timestamp_0 + 4]
mult=is_valid * -1, args=[1, 9, writes_aux__prev_data__0_2, writes_aux__prev_data__1_2, writes_aux__prev_data__2_2, writes_aux__prev_data__3_2, reads_aux__0__base__prev_timestamp_2]
mult=is_valid * 1, args=[1, 9, a__0_2, a__1_2, a__2_2, a__3_2, from_state__timestamp_0 + 7]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_1 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 30720), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_2 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 61440), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_2, a__1_2, 0, 0]
mult=is_valid * 1, args=[a__2_2, a__3_2, 0, 0]

// Algebraic constraints:
(7864320 * a__0_2 - (7864320 * writes_aux__prev_data__0_2 + 7864320 * is_valid)) * (7864320 * a__0_2 - (7864320 * writes_aux__prev_data__0_2 + 7864321)) = 0
(30720 * a__0_2 + 7864320 * a__1_2 - (30720 * writes_aux__prev_data__0_2 + 7864320 * writes_aux__prev_data__1_2 + 30720 * is_valid)) * (30720 * a__0_2 + 7864320 * a__1_2 - (30720 * writes_aux__prev_data__0_2 + 7864320 * writes_aux__prev_data__1_2 + 30721)) = 0
(120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * writes_aux__prev_data__0_2 + 30720 * writes_aux__prev_data__1_2 + 7864320 * writes_aux__prev_data__2_2 + 120 * is_valid)) * (120 * a__0_2 + 30720 * a__1_2 + 7864320 * a__2_2 - (120 * writes_aux__prev_data__0_2 + 30720 * writes_aux__prev_data__1_2 + 7864320 * writes_aux__prev_data__2_2 + 121)) = 0
(943718400 * writes_aux__prev_data__0_2 + 120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_2 + 30720 * writes_aux__prev_data__2_2 + 7864320 * writes_aux__prev_data__3_2 + 943718400 * a__0_2)) * (943718400 * writes_aux__prev_data__0_2 + 120 * a__1_2 + 30720 * a__2_2 + 7864320 * a__3_2 + 943718399 - (120 * writes_aux__prev_data__1_2 + 30720 * writes_aux__prev_data__2_2 + 7864320 * writes_aux__prev_data__3_2 + 943718400 * a__0_2)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/apc_snapshots/superblocks/many_blocks.txt
================================================
Instructions:
   0: ADD rd_ptr = 10, rs1_ptr = 0, rs2 = 10, rs2_as = 0
   4: BNE 10 11 100 1 1
   8: ADD rd_ptr = 12, rs1_ptr = 11, rs2 = 5, rs2_as = 0
  12: BEQ 8 12 60 1 1
  72: ADD rd_ptr = 9, rs1_ptr = 9, rs2 = 1, rs2_as = 0

APC advantage:
  - Main columns: 160 -> 30 (5.33x reduction)
  - Bus interactions: 82 -> 28 (2.93x reduction)
  - Constraints: 88 -> 5 (17.60x reduction)

Symbolic machine using 30 unique main columns:
  from_state__timestamp_0
  reads_aux__0__base__prev_timestamp_0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__base__prev_timestamp_0
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_0
  writes_aux__prev_data__0_0
  writes_aux__prev_data__1_0
  writes_aux__prev_data__2_0
  writes_aux__prev_data__3_0
  reads_aux__1__base__prev_timestamp_1
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_1
  writes_aux__base__prev_timestamp_2
  writes_aux__base__timestamp_lt_aux__lower_decomp__0_2
  writes_aux__prev_data__0_2
  writes_aux__prev_data__1_2
  writes_aux__prev_data__2_2
  writes_aux__prev_data__3_2
  reads_aux__0__base__prev_timestamp_3
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_3
  reads_aux__0__base__prev_timestamp_4
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4
  writes_aux__prev_data__0_4
  writes_aux__prev_data__1_4
  writes_aux__prev_data__2_4
  writes_aux__prev_data__3_4
  a__0_4
  a__1_4
  a__2_4
  a__3_4
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=is_valid * -1, args=[0, from_state__timestamp_0]
mult=is_valid * 1, args=[76, from_state__timestamp_0 + 13]

// Bus 1 (MEMORY):
mult=is_valid * -1, args=[1, 0, 0, 0, 0, 0, reads_aux__0__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 0, 0, 0, 0, 0, from_state__timestamp_0]
mult=is_valid * -1, args=[1, 10, writes_aux__prev_data__0_0, writes_aux__prev_data__1_0, writes_aux__prev_data__2_0, writes_aux__prev_data__3_0, writes_aux__base__prev_timestamp_0]
mult=is_valid * 1, args=[1, 10, 10, 0, 0, 0, from_state__timestamp_0 + 3]
mult=is_valid * -1, args=[1, 11, 10, 0, 0, 0, reads_aux__1__base__prev_timestamp_1]
mult=is_valid * 1, args=[1, 11, 10, 0, 0, 0, from_state__timestamp_0 + 5]
mult=is_valid * -1, args=[1, 12, writes_aux__prev_data__0_2, writes_aux__prev_data__1_2, writes_aux__prev_data__2_2, writes_aux__prev_data__3_2, writes_aux__base__prev_timestamp_2]
mult=is_valid * -1, args=[1, 8, 15, 0, 0, 0, reads_aux__0__base__prev_timestamp_3]
mult=is_valid * 1, args=[1, 8, 15, 0, 0, 0, from_state__timestamp_0 + 8]
mult=is_valid * 1, args=[1, 12, 15, 0, 0, 0, from_state__timestamp_0 + 9]
mult=is_valid * -1, args=[1, 9, writes_aux__prev_data__0_4, writes_aux__prev_data__1_4, writes_aux__prev_data__2_4, writes_aux__prev_data__3_4, reads_aux__0__base__prev_timestamp_4]
mult=is_valid * 1, args=[1, 9, a__0_4, a__1_4, a__2_4, a__3_4, from_state__timestamp_0 + 12]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_0 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_0 + 15360 - 15360 * from_state__timestamp_0, 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_0, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_0 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_0 - (15360 * from_state__timestamp_0 + 15360), 12]
mult=is_valid * 1, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_1, 17]
mult=is_valid * 1, args=[15360 * reads_aux__1__base__prev_timestamp_1 + 15360 * reads_aux__1__base__timestamp_lt_aux__lower_decomp__0_1 - (15360 * from_state__timestamp_0 + 46080), 12]
mult=is_valid * 1, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0_2, 17]
mult=is_valid * 1, args=[15360 * writes_aux__base__prev_timestamp_2 + 15360 * writes_aux__base__timestamp_lt_aux__lower_decomp__0_2 - (15360 * from_state__timestamp_0 + 92160), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_3, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_3 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_3 - (15360 * from_state__timestamp_0 + 107520), 12]
mult=is_valid * 1, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4, 17]
mult=is_valid * 1, args=[15360 * reads_aux__0__base__prev_timestamp_4 + 15360 * reads_aux__0__base__timestamp_lt_aux__lower_decomp__0_4 - (15360 * from_state__timestamp_0 + 138240), 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid * 1, args=[a__0_4, a__1_4, 0, 0]
mult=is_valid * 1, args=[a__2_4, a__3_4, 0, 0]

// Algebraic constraints:
(7864320 * a__0_4 - (7864320 * writes_aux__prev_data__0_4 + 7864320 * is_valid)) * (7864320 * a__0_4 - (7864320 * writes_aux__prev_data__0_4 + 7864321)) = 0
(30720 * a__0_4 + 7864320 * a__1_4 - (30720 * writes_aux__prev_data__0_4 + 7864320 * writes_aux__prev_data__1_4 + 30720 * is_valid)) * (30720 * a__0_4 + 7864320 * a__1_4 - (30720 * writes_aux__prev_data__0_4 + 7864320 * writes_aux__prev_data__1_4 + 30721)) = 0
(120 * a__0_4 + 30720 * a__1_4 + 7864320 * a__2_4 - (120 * writes_aux__prev_data__0_4 + 30720 * writes_aux__prev_data__1_4 + 7864320 * writes_aux__prev_data__2_4 + 120 * is_valid)) * (120 * a__0_4 + 30720 * a__1_4 + 7864320 * a__2_4 - (120 * writes_aux__prev_data__0_4 + 30720 * writes_aux__prev_data__1_4 + 7864320 * writes_aux__prev_data__2_4 + 121)) = 0
(943718400 * writes_aux__prev_data__0_4 + 120 * a__1_4 + 30720 * a__2_4 + 7864320 * a__3_4 + 943718400 * is_valid - (120 * writes_aux__prev_data__1_4 + 30720 * writes_aux__prev_data__2_4 + 7864320 * writes_aux__prev_data__3_4 + 943718400 * a__0_4)) * (943718400 * writes_aux__prev_data__0_4 + 120 * a__1_4 + 30720 * a__2_4 + 7864320 * a__3_4 + 943718399 - (120 * writes_aux__prev_data__1_4 + 30720 * writes_aux__prev_data__2_4 + 7864320 * writes_aux__prev_data__3_4 + 943718400 * a__0_4)) = 0
is_valid * (is_valid - 1) = 0

================================================
FILE: openvm-riscv/tests/common/mod.rs
================================================
use openvm_instructions::instruction::Instruction;
use openvm_sdk::config::SdkVmConfig;
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::blocks::SuperBlock;
use powdr_openvm::extraction_utils::OriginalVmConfig;
use powdr_openvm::test_utils;
use powdr_openvm_riscv::{ExtendedVmConfig, RiscvISA};
use powdr_openvm_riscv_hints_circuit::HintsExtension;
use std::path::Path;

pub fn original_vm_config() -> OriginalVmConfig<RiscvISA> {
    let sdk_vm_config = SdkVmConfig::builder()
        .system(Default::default())
        .rv32i(Default::default())
        .rv32m(Default::default())
        .io(Default::default())
        .build();

    let ext_vm_config = ExtendedVmConfig {
        sdk: sdk_vm_config,
        hints: HintsExtension,
    };
    OriginalVmConfig::new(ext_vm_config)
}

pub mod apc_builder_utils {
    use super::*;

    // This code is not dead, but somehow the compiler thinks so.
    #[allow(dead_code)]
    pub fn compile(superblock: SuperBlock<Instruction<BabyBear>>) -> String {
        let original_config = original_vm_config();
        test_utils::compile_apc::<RiscvISA>(&original_config, superblock)
    }

    // This code is not dead, but somehow the compiler thinks so.
    #[allow(dead_code)]
    pub fn assert_machine_output(
        program: SuperBlock<Instruction<BabyBear>>,
        module_name: &str,
        test_name: &str,
    ) {
        let snapshot_dir = Path::new(env!("CARGO_MANIFEST_DIR"))
            .join("tests")
            .join("apc_snapshots");
        let original_config = original_vm_config();
        test_utils::assert_apc_machine_output::<RiscvISA>(
            &original_config,
            program,
            &snapshot_dir,
            module_name,
            test_name,
        );
    }
}


================================================
FILE: openvm-riscv/tests/keccak_apc_pre_opt.cbor
================================================
[File too large to display: 28.0 MB]

================================================
FILE: openvm-riscv/tests/machine_extraction.rs
================================================
use std::{fs, io, path::Path};
mod common;

use itertools::Itertools;
use powdr_openvm_riscv::DEFAULT_DEGREE_BOUND;
use pretty_assertions::assert_eq;

use crate::common::original_vm_config;

#[test]
fn extract_machine() {
    let original_config = original_vm_config();
    let airs = original_config.airs(DEFAULT_DEGREE_BOUND).unwrap();
    let bus_map = original_config.bus_map();
    let rendered = airs
        .airs_by_name()
        .map(|(machine_name, air)| format!("# {machine_name}\n{}", air.render(&bus_map)))
        .join("\n\n\n");

    let path = Path::new(env!("CARGO_MANIFEST_DIR"))
        .join("tests")
        .join("openvm_constraints.txt");
    match fs::read_to_string(&path) {
        // Snapshot exists, compare it with the extracted constraints
        Ok(expected) => {
            assert_eq!(rendered, expected)
        }

        // Snapshot does not exist, create it
        Err(err) if err.kind() == io::ErrorKind::NotFound => {
            if let Some(parent) = path.parent() {
                fs::create_dir_all(parent).unwrap();
            }
            fs::write(&path, &rendered).unwrap();
            panic!("Created new snapshot at {path:?}. Inspect it, then rerun the tests.");
        }

        Err(_) => panic!(),
    }
}


================================================
FILE: openvm-riscv/tests/openvm_constraints.txt
================================================
# VmAirWrapper<Rv32BaseAluAdapterAir, BaseAluCoreAir<4, 8>
Symbolic machine using 36 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2
  rs2_as
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  opcode_add_flag
  opcode_sub_flag
  opcode_xor_flag
  opcode_or_flag
  opcode_and_flag

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag), args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * (0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag), args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[1, rd_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[from_state__pc, 512 + (0 + opcode_add_flag * 0 + opcode_sub_flag * 1 + opcode_xor_flag * 2 + opcode_or_flag * 3 + opcode_and_flag * 4), rd_ptr, rs1_ptr, rs2, 1, rs2_as, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[(1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__0 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * b__0, (1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__0 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * c__0, opcode_xor_flag * a__0 + opcode_or_flag * (2 * a__0 - b__0 - c__0) + opcode_and_flag * (b__0 + c__0 - 2 * a__0), 1]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[(1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__1 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * b__1, (1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__1 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * c__1, opcode_xor_flag * a__1 + opcode_or_flag * (2 * a__1 - b__1 - c__1) + opcode_and_flag * (b__1 + c__1 - 2 * a__1), 1]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[(1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__2 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * b__2, (1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__2 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * c__2, opcode_xor_flag * a__2 + opcode_or_flag * (2 * a__2 - b__2 - c__2) + opcode_and_flag * (b__2 + c__2 - 2 * a__2), 1]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag, args=[(1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__3 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * b__3, (1 - (opcode_xor_flag + opcode_or_flag + opcode_and_flag)) * a__3 + (opcode_xor_flag + opcode_or_flag + opcode_and_flag) * c__3, opcode_xor_flag * a__3 + opcode_or_flag * (2 * a__3 - b__3 - c__3) + opcode_and_flag * (b__3 + c__3 - 2 * a__3), 1]
mult=0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag - rs2_as, args=[c__0, c__1, 0, 0]

// Algebraic constraints:
opcode_add_flag * (opcode_add_flag - 1) = 0
opcode_sub_flag * (opcode_sub_flag - 1) = 0
opcode_xor_flag * (opcode_xor_flag - 1) = 0
opcode_or_flag * (opcode_or_flag - 1) = 0
opcode_and_flag * (opcode_and_flag - 1) = 0
(0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag) * (0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag - 1) = 0
opcode_add_flag * (2005401601 * (b__0 + c__0 - a__0 + 0) * (2005401601 * (b__0 + c__0 - a__0 + 0) - 1)) = 0
opcode_sub_flag * (2005401601 * (a__0 + c__0 - b__0 + 0) * (2005401601 * (a__0 + c__0 - b__0 + 0) - 1)) = 0
opcode_add_flag * (2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0)) * (2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0)) - 1)) = 0
opcode_sub_flag * (2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0)) * (2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0)) - 1)) = 0
opcode_add_flag * (2005401601 * (b__2 + c__2 - a__2 + 2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0))) * (2005401601 * (b__2 + c__2 - a__2 + 2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0))) - 1)) = 0
opcode_sub_flag * (2005401601 * (a__2 + c__2 - b__2 + 2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0))) * (2005401601 * (a__2 + c__2 - b__2 + 2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0))) - 1)) = 0
opcode_add_flag * (2005401601 * (b__3 + c__3 - a__3 + 2005401601 * (b__2 + c__2 - a__2 + 2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0)))) * (2005401601 * (b__3 + c__3 - a__3 + 2005401601 * (b__2 + c__2 - a__2 + 2005401601 * (b__1 + c__1 - a__1 + 2005401601 * (b__0 + c__0 - a__0 + 0)))) - 1)) = 0
opcode_sub_flag * (2005401601 * (a__3 + c__3 - b__3 + 2005401601 * (a__2 + c__2 - b__2 + 2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0)))) * (2005401601 * (a__3 + c__3 - b__3 + 2005401601 * (a__2 + c__2 - b__2 + 2005401601 * (a__1 + c__1 - b__1 + 2005401601 * (a__0 + c__0 - b__0 + 0)))) - 1)) = 0
rs2_as * (rs2_as - 1) = 0
(1 - rs2_as) * (rs2 - (c__0 + c__1 * 256 + c__2 * 65536)) = 0
(1 - rs2_as) * (c__2 - c__3) = 0
(1 - rs2_as) * (c__2 * (255 - c__2)) = 0
(0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
rs2_as * (0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag - 1) = 0
rs2_as * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_add_flag + opcode_sub_flag + opcode_xor_flag + opcode_or_flag + opcode_and_flag) * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32BaseAluAdapterAir, LessThanCoreAir<4, 8>
Symbolic machine using 37 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2
  rs2_as
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  cmp_result
  opcode_slt_flag
  opcode_sltu_flag
  b_msb_f
  c_msb_f
  diff_marker__0
  diff_marker__1
  diff_marker__2
  diff_marker__3
  diff_val

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_slt_flag + opcode_sltu_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_slt_flag + opcode_sltu_flag), args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * (0 + opcode_slt_flag + opcode_sltu_flag), args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[1, rd_ptr, cmp_result, 0, 0, 0, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[from_state__pc, 0 + opcode_slt_flag * 0 + opcode_sltu_flag * 1 + 520, rd_ptr, rs1_ptr, rs2, 1, rs2_as, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=0 + opcode_slt_flag + opcode_sltu_flag, args=[b_msb_f + 128 * opcode_slt_flag, c_msb_f + 128 * opcode_slt_flag, 0, 0]
mult=0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0, args=[diff_val - 1, 0, 0, 0]
mult=0 + opcode_slt_flag + opcode_sltu_flag - rs2_as, args=[c__0, c__1, 0, 0]

// Algebraic constraints:
opcode_slt_flag * (opcode_slt_flag - 1) = 0
opcode_sltu_flag * (opcode_sltu_flag - 1) = 0
(0 + opcode_slt_flag + opcode_sltu_flag) * (0 + opcode_slt_flag + opcode_sltu_flag - 1) = 0
cmp_result * (cmp_result - 1) = 0
(b__3 - b_msb_f) * (256 - (b__3 - b_msb_f)) = 0
(c__3 - c_msb_f) * (256 - (c__3 - c_msb_f)) = 0
diff_marker__3 * (diff_marker__3 - 1) = 0
(1 - (0 + diff_marker__3)) * ((c_msb_f - b_msb_f) * (2 * cmp_result - 1)) = 0
diff_marker__3 * (diff_val - (c_msb_f - b_msb_f) * (2 * cmp_result - 1)) = 0
diff_marker__2 * (diff_marker__2 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2)) * ((c__2 - b__2) * (2 * cmp_result - 1)) = 0
diff_marker__2 * (diff_val - (c__2 - b__2) * (2 * cmp_result - 1)) = 0
diff_marker__1 * (diff_marker__1 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1)) * ((c__1 - b__1) * (2 * cmp_result - 1)) = 0
diff_marker__1 * (diff_val - (c__1 - b__1) * (2 * cmp_result - 1)) = 0
diff_marker__0 * (diff_marker__0 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0)) * ((c__0 - b__0) * (2 * cmp_result - 1)) = 0
diff_marker__0 * (diff_val - (c__0 - b__0) * (2 * cmp_result - 1)) = 0
(0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0) * (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0)) * cmp_result = 0
rs2_as * (rs2_as - 1) = 0
(1 - rs2_as) * (rs2 - (c__0 + c__1 * 256 + c__2 * 65536)) = 0
(1 - rs2_as) * (c__2 - c__3) = 0
(1 - rs2_as) * (c__2 * (255 - c__2)) = 0
(0 + opcode_slt_flag + opcode_sltu_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
rs2_as * (0 + opcode_slt_flag + opcode_sltu_flag - 1) = 0
rs2_as * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_slt_flag + opcode_sltu_flag) * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32BaseAluAdapterAir, ShiftCoreAir<4, 8>
Symbolic machine using 53 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2
  rs2_as
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  opcode_sll_flag
  opcode_srl_flag
  opcode_sra_flag
  bit_multiplier_left
  bit_multiplier_right
  b_sign
  bit_shift_marker__0
  bit_shift_marker__1
  bit_shift_marker__2
  bit_shift_marker__3
  bit_shift_marker__4
  bit_shift_marker__5
  bit_shift_marker__6
  bit_shift_marker__7
  limb_shift_marker__0
  limb_shift_marker__1
  limb_shift_marker__2
  limb_shift_marker__3
  bit_shift_carry__0
  bit_shift_carry__1
  bit_shift_carry__2
  bit_shift_carry__3

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag), args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=rs2_as, args=[rs2_as, rs2, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * (0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag), args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[1, rd_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[from_state__pc, 517 + (0 + opcode_sll_flag * 0 + opcode_srl_flag * 1 + opcode_sra_flag * 2), rd_ptr, rs1_ptr, rs2, 1, rs2_as, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[(c__0 - (0 + 0 * limb_shift_marker__0 + 1 * limb_shift_marker__1 + 2 * limb_shift_marker__2 + 3 * limb_shift_marker__3) * 8 - (0 + 0 * bit_shift_marker__0 + 1 * bit_shift_marker__1 + 2 * bit_shift_marker__2 + 3 * bit_shift_marker__3 + 4 * bit_shift_marker__4 + 5 * bit_shift_marker__5 + 6 * bit_shift_marker__6 + 7 * bit_shift_marker__7)) * 1950351361, 3]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[bit_shift_carry__0, 0 + 0 * bit_shift_marker__0 + 1 * bit_shift_marker__1 + 2 * bit_shift_marker__2 + 3 * bit_shift_marker__3 + 4 * bit_shift_marker__4 + 5 * bit_shift_marker__5 + 6 * bit_shift_marker__6 + 7 * bit_shift_marker__7]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[bit_shift_carry__1, 0 + 0 * bit_shift_marker__0 + 1 * bit_shift_marker__1 + 2 * bit_shift_marker__2 + 3 * bit_shift_marker__3 + 4 * bit_shift_marker__4 + 5 * bit_shift_marker__5 + 6 * bit_shift_marker__6 + 7 * bit_shift_marker__7]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[bit_shift_carry__2, 0 + 0 * bit_shift_marker__0 + 1 * bit_shift_marker__1 + 2 * bit_shift_marker__2 + 3 * bit_shift_marker__3 + 4 * bit_shift_marker__4 + 5 * bit_shift_marker__5 + 6 * bit_shift_marker__6 + 7 * bit_shift_marker__7]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[bit_shift_carry__3, 0 + 0 * bit_shift_marker__0 + 1 * bit_shift_marker__1 + 2 * bit_shift_marker__2 + 3 * bit_shift_marker__3 + 4 * bit_shift_marker__4 + 5 * bit_shift_marker__5 + 6 * bit_shift_marker__6 + 7 * bit_shift_marker__7]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=rs2_as, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=opcode_sra_flag, args=[b__3, 128, b__3 + 128 - 2 * (b_sign * 128), 1]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[a__0, a__1, 0, 0]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag, args=[a__2, a__3, 0, 0]
mult=0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag - rs2_as, args=[c__0, c__1, 0, 0]

// Algebraic constraints:
opcode_sll_flag * (opcode_sll_flag - 1) = 0
opcode_srl_flag * (opcode_srl_flag - 1) = 0
opcode_sra_flag * (opcode_sra_flag - 1) = 0
(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag) * (0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag - 1) = 0
bit_shift_marker__0 * (bit_shift_marker__0 - 1) = 0
bit_shift_marker__0 * (bit_multiplier_left - 1 * opcode_sll_flag) = 0
bit_shift_marker__0 * (bit_multiplier_right - 1 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__1 * (bit_shift_marker__1 - 1) = 0
bit_shift_marker__1 * (bit_multiplier_left - 2 * opcode_sll_flag) = 0
bit_shift_marker__1 * (bit_multiplier_right - 2 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__2 * (bit_shift_marker__2 - 1) = 0
bit_shift_marker__2 * (bit_multiplier_left - 4 * opcode_sll_flag) = 0
bit_shift_marker__2 * (bit_multiplier_right - 4 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__3 * (bit_shift_marker__3 - 1) = 0
bit_shift_marker__3 * (bit_multiplier_left - 8 * opcode_sll_flag) = 0
bit_shift_marker__3 * (bit_multiplier_right - 8 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__4 * (bit_shift_marker__4 - 1) = 0
bit_shift_marker__4 * (bit_multiplier_left - 16 * opcode_sll_flag) = 0
bit_shift_marker__4 * (bit_multiplier_right - 16 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__5 * (bit_shift_marker__5 - 1) = 0
bit_shift_marker__5 * (bit_multiplier_left - 32 * opcode_sll_flag) = 0
bit_shift_marker__5 * (bit_multiplier_right - 32 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__6 * (bit_shift_marker__6 - 1) = 0
bit_shift_marker__6 * (bit_multiplier_left - 64 * opcode_sll_flag) = 0
bit_shift_marker__6 * (bit_multiplier_right - 64 * (opcode_srl_flag + opcode_sra_flag)) = 0
bit_shift_marker__7 * (bit_shift_marker__7 - 1) = 0
bit_shift_marker__7 * (bit_multiplier_left - 128 * opcode_sll_flag) = 0
bit_shift_marker__7 * (bit_multiplier_right - 128 * (opcode_srl_flag + opcode_sra_flag)) = 0
(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag) * (0 + bit_shift_marker__0 + bit_shift_marker__1 + bit_shift_marker__2 + bit_shift_marker__3 + bit_shift_marker__4 + bit_shift_marker__5 + bit_shift_marker__6 + bit_shift_marker__7 - 1) = 0
limb_shift_marker__0 * (limb_shift_marker__0 - 1) = 0
limb_shift_marker__0 * (a__0 * opcode_sll_flag - (0 + b__0 * bit_multiplier_left - 256 * bit_shift_carry__0 * opcode_sll_flag)) = 0
limb_shift_marker__0 * (a__0 * bit_multiplier_right - (bit_shift_carry__1 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__0 - bit_shift_carry__0))) = 0
limb_shift_marker__0 * (a__1 * opcode_sll_flag - (bit_shift_carry__0 * opcode_sll_flag + b__1 * bit_multiplier_left - 256 * bit_shift_carry__1 * opcode_sll_flag)) = 0
limb_shift_marker__0 * (a__1 * bit_multiplier_right - (bit_shift_carry__2 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__1 - bit_shift_carry__1))) = 0
limb_shift_marker__0 * (a__2 * opcode_sll_flag - (bit_shift_carry__1 * opcode_sll_flag + b__2 * bit_multiplier_left - 256 * bit_shift_carry__2 * opcode_sll_flag)) = 0
limb_shift_marker__0 * (a__2 * bit_multiplier_right - (bit_shift_carry__3 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__2 - bit_shift_carry__2))) = 0
limb_shift_marker__0 * (a__3 * opcode_sll_flag - (bit_shift_carry__2 * opcode_sll_flag + b__3 * bit_multiplier_left - 256 * bit_shift_carry__3 * opcode_sll_flag)) = 0
limb_shift_marker__0 * (a__3 * bit_multiplier_right - (b_sign * (bit_multiplier_right - 1) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__3 - bit_shift_carry__3))) = 0
limb_shift_marker__1 * (limb_shift_marker__1 - 1) = 0
limb_shift_marker__1 * (a__0 * opcode_sll_flag) = 0
limb_shift_marker__1 * (a__0 * bit_multiplier_right - (bit_shift_carry__2 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__1 - bit_shift_carry__1))) = 0
limb_shift_marker__1 * (a__1 * opcode_sll_flag - (0 + b__0 * bit_multiplier_left - 256 * bit_shift_carry__0 * opcode_sll_flag)) = 0
limb_shift_marker__1 * (a__1 * bit_multiplier_right - (bit_shift_carry__3 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__2 - bit_shift_carry__2))) = 0
limb_shift_marker__1 * (a__2 * opcode_sll_flag - (bit_shift_carry__0 * opcode_sll_flag + b__1 * bit_multiplier_left - 256 * bit_shift_carry__1 * opcode_sll_flag)) = 0
limb_shift_marker__1 * (a__2 * bit_multiplier_right - (b_sign * (bit_multiplier_right - 1) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__3 - bit_shift_carry__3))) = 0
limb_shift_marker__1 * (a__3 * opcode_sll_flag - (bit_shift_carry__1 * opcode_sll_flag + b__2 * bit_multiplier_left - 256 * bit_shift_carry__2 * opcode_sll_flag)) = 0
limb_shift_marker__1 * (a__3 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
limb_shift_marker__2 * (limb_shift_marker__2 - 1) = 0
limb_shift_marker__2 * (a__0 * opcode_sll_flag) = 0
limb_shift_marker__2 * (a__0 * bit_multiplier_right - (bit_shift_carry__3 * (opcode_srl_flag + opcode_sra_flag) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__2 - bit_shift_carry__2))) = 0
limb_shift_marker__2 * (a__1 * opcode_sll_flag) = 0
limb_shift_marker__2 * (a__1 * bit_multiplier_right - (b_sign * (bit_multiplier_right - 1) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__3 - bit_shift_carry__3))) = 0
limb_shift_marker__2 * (a__2 * opcode_sll_flag - (0 + b__0 * bit_multiplier_left - 256 * bit_shift_carry__0 * opcode_sll_flag)) = 0
limb_shift_marker__2 * (a__2 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
limb_shift_marker__2 * (a__3 * opcode_sll_flag - (bit_shift_carry__0 * opcode_sll_flag + b__1 * bit_multiplier_left - 256 * bit_shift_carry__1 * opcode_sll_flag)) = 0
limb_shift_marker__2 * (a__3 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
limb_shift_marker__3 * (limb_shift_marker__3 - 1) = 0
limb_shift_marker__3 * (a__0 * opcode_sll_flag) = 0
limb_shift_marker__3 * (a__0 * bit_multiplier_right - (b_sign * (bit_multiplier_right - 1) * 256 + (opcode_srl_flag + opcode_sra_flag) * (b__3 - bit_shift_carry__3))) = 0
limb_shift_marker__3 * (a__1 * opcode_sll_flag) = 0
limb_shift_marker__3 * (a__1 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
limb_shift_marker__3 * (a__2 * opcode_sll_flag) = 0
limb_shift_marker__3 * (a__2 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
limb_shift_marker__3 * (a__3 * opcode_sll_flag - (0 + b__0 * bit_multiplier_left - 256 * bit_shift_carry__0 * opcode_sll_flag)) = 0
limb_shift_marker__3 * (a__3 * (opcode_srl_flag + opcode_sra_flag) - b_sign * 255) = 0
(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag) * (0 + limb_shift_marker__0 + limb_shift_marker__1 + limb_shift_marker__2 + limb_shift_marker__3 - 1) = 0
b_sign * (b_sign - 1) = 0
(1 - opcode_sra_flag) * b_sign = 0
rs2_as * (rs2_as - 1) = 0
(1 - rs2_as) * (rs2 - (c__0 + c__1 * 256 + c__2 * 65536)) = 0
(1 - rs2_as) * (c__2 - c__3) = 0
(1 - rs2_as) * (c__2 * (255 - c__2)) = 0
(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
rs2_as * (0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag - 1) = 0
rs2_as * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_sll_flag + opcode_srl_flag + opcode_sra_flag) * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32BranchAdapterAir, BranchEqualCoreAir<4>
Symbolic machine using 26 unique main columns:
  from_state__pc
  from_state__timestamp
  rs1_ptr
  rs2_ptr
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  cmp_result
  imm
  opcode_beq_flag
  opcode_bne_flag
  diff_inv_marker__0
  diff_inv_marker__1
  diff_inv_marker__2
  diff_inv_marker__3

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_beq_flag + opcode_bne_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[from_state__pc + cmp_result * imm + (1 - cmp_result) * 4, from_state__timestamp + 2]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_beq_flag + opcode_bne_flag), args=[1, rs1_ptr, a__0, a__1, a__2, a__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[1, rs1_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 0]
mult=2013265920 * (0 + opcode_beq_flag + opcode_bne_flag), args=[1, rs2_ptr, b__0, b__1, b__2, b__3, reads_aux__1__base__prev_timestamp]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[1, rs2_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 1]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[from_state__pc, 0 + opcode_beq_flag * 0 + opcode_bne_flag * 1 + 544, rs1_ptr, rs2_ptr, imm, 1, 1, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_beq_flag + opcode_bne_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]

// Algebraic constraints:
opcode_beq_flag * (opcode_beq_flag - 1) = 0
opcode_bne_flag * (opcode_bne_flag - 1) = 0
(0 + opcode_beq_flag + opcode_bne_flag) * (0 + opcode_beq_flag + opcode_bne_flag - 1) = 0
cmp_result * (cmp_result - 1) = 0
(cmp_result * opcode_beq_flag + (1 - cmp_result) * opcode_bne_flag) * (a__0 - b__0) = 0
(cmp_result * opcode_beq_flag + (1 - cmp_result) * opcode_bne_flag) * (a__1 - b__1) = 0
(cmp_result * opcode_beq_flag + (1 - cmp_result) * opcode_bne_flag) * (a__2 - b__2) = 0
(cmp_result * opcode_beq_flag + (1 - cmp_result) * opcode_bne_flag) * (a__3 - b__3) = 0
(0 + opcode_beq_flag + opcode_bne_flag) * (cmp_result * opcode_beq_flag + (1 - cmp_result) * opcode_bne_flag + (a__0 - b__0) * diff_inv_marker__0 + (a__1 - b__1) * diff_inv_marker__1 + (a__2 - b__2) * diff_inv_marker__2 + (a__3 - b__3) * diff_inv_marker__3 - 1) = 0
(0 + opcode_beq_flag + opcode_bne_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_beq_flag + opcode_bne_flag) * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32BranchAdapterAir, BranchLessThanCoreAir<4, 8>
Symbolic machine using 32 unique main columns:
  from_state__pc
  from_state__timestamp
  rs1_ptr
  rs2_ptr
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  cmp_result
  imm
  opcode_blt_flag
  opcode_bltu_flag
  opcode_bge_flag
  opcode_bgeu_flag
  a_msb_f
  b_msb_f
  cmp_lt
  diff_marker__0
  diff_marker__1
  diff_marker__2
  diff_marker__3
  diff_val

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[from_state__pc + cmp_result * imm + (1 - cmp_result) * 4, from_state__timestamp + 2]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag), args=[1, rs1_ptr, a__0, a__1, a__2, a__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[1, rs1_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 0]
mult=2013265920 * (0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag), args=[1, rs2_ptr, b__0, b__1, b__2, b__3, reads_aux__1__base__prev_timestamp]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[1, rs2_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 1]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[from_state__pc, 0 + opcode_blt_flag * 0 + opcode_bltu_flag * 1 + opcode_bge_flag * 2 + opcode_bgeu_flag * 3 + 549, rs1_ptr, rs2_ptr, imm, 1, 1, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag, args=[a_msb_f + 128 * (opcode_blt_flag + opcode_bge_flag), b_msb_f + 128 * (opcode_blt_flag + opcode_bge_flag), 0, 0]
mult=0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0, args=[diff_val - 1, 0, 0, 0]

// Algebraic constraints:
opcode_blt_flag * (opcode_blt_flag - 1) = 0
opcode_bltu_flag * (opcode_bltu_flag - 1) = 0
opcode_bge_flag * (opcode_bge_flag - 1) = 0
opcode_bgeu_flag * (opcode_bgeu_flag - 1) = 0
(0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag) * (0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag - 1) = 0
cmp_result * (cmp_result - 1) = 0
cmp_lt - (cmp_result * (opcode_blt_flag + opcode_bltu_flag) + (1 - cmp_result) * (opcode_bge_flag + opcode_bgeu_flag)) = 0
(a__3 - a_msb_f) * (256 - (a__3 - a_msb_f)) = 0
(b__3 - b_msb_f) * (256 - (b__3 - b_msb_f)) = 0
diff_marker__3 * (diff_marker__3 - 1) = 0
(1 - (0 + diff_marker__3)) * ((b_msb_f - a_msb_f) * (2 * cmp_lt - 1)) = 0
diff_marker__3 * (diff_val - (b_msb_f - a_msb_f) * (2 * cmp_lt - 1)) = 0
diff_marker__2 * (diff_marker__2 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2)) * ((b__2 - a__2) * (2 * cmp_lt - 1)) = 0
diff_marker__2 * (diff_val - (b__2 - a__2) * (2 * cmp_lt - 1)) = 0
diff_marker__1 * (diff_marker__1 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1)) * ((b__1 - a__1) * (2 * cmp_lt - 1)) = 0
diff_marker__1 * (diff_val - (b__1 - a__1) * (2 * cmp_lt - 1)) = 0
diff_marker__0 * (diff_marker__0 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0)) * ((b__0 - a__0) * (2 * cmp_lt - 1)) = 0
diff_marker__0 * (diff_val - (b__0 - a__0) * (2 * cmp_lt - 1)) = 0
(0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0) * (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0 - 1) = 0
(1 - (0 + diff_marker__3 + diff_marker__2 + diff_marker__1 + diff_marker__0)) * cmp_lt = 0
(0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_blt_flag + opcode_bltu_flag + opcode_bge_flag + opcode_bgeu_flag) * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32CondRdWriteAdapterAir, Rv32JalLuiCoreAir>
Symbolic machine using 18 unique main columns:
  inner__from_state__pc
  inner__from_state__timestamp
  inner__rd_ptr
  inner__rd_aux_cols__base__prev_timestamp
  inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  inner__rd_aux_cols__prev_data__0
  inner__rd_aux_cols__prev_data__1
  inner__rd_aux_cols__prev_data__2
  inner__rd_aux_cols__prev_data__3
  needs_write
  imm
  rd_data__0
  rd_data__1
  rd_data__2
  rd_data__3
  is_jal
  is_lui

// Bus 0 (EXECUTION_BRIDGE):
mult=-(is_lui + is_jal), args=[inner__from_state__pc, inner__from_state__timestamp]
mult=is_lui + is_jal, args=[inner__from_state__pc + is_lui * 4 + is_jal * imm, inner__from_state__timestamp + 1]

// Bus 1 (MEMORY):
mult=2013265920 * needs_write, args=[1, inner__rd_ptr, inner__rd_aux_cols__prev_data__0, inner__rd_aux_cols__prev_data__1, inner__rd_aux_cols__prev_data__2, inner__rd_aux_cols__prev_data__3, inner__rd_aux_cols__base__prev_timestamp]
mult=needs_write, args=[1, inner__rd_ptr, rd_data__0, rd_data__1, rd_data__2, rd_data__3, inner__from_state__timestamp]

// Bus 2 (PC_LOOKUP):
mult=is_lui + is_jal, args=[inner__from_state__pc, 560 + (is_lui * 1 + is_jal * 0), inner__rd_ptr, 0, imm, 1, 0, needs_write, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=needs_write, args=[inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=needs_write, args=[inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_lui + is_jal, args=[rd_data__0, rd_data__1, 0, 0]
mult=is_lui + is_jal, args=[rd_data__2, rd_data__3, 0, 0]
mult=is_jal, args=[rd_data__3, 192, rd_data__3 + 192, 1]

// Algebraic constraints:
is_lui * (is_lui - 1) = 0
is_jal * (is_jal - 1) = 0
(is_lui + is_jal) * (is_lui + is_jal - 1) = 0
is_lui * rd_data__0 = 0
is_lui * (0 + rd_data__1 * 1 + rd_data__2 * 256 + rd_data__3 * 65536 - imm * 16) = 0
is_jal * (rd_data__0 + (0 + rd_data__1 * 1 + rd_data__2 * 256 + rd_data__3 * 65536) * 256 - (inner__from_state__pc + 4)) = 0
needs_write * (needs_write - 1) = 0
(1 - (is_lui + is_jal)) * needs_write = 0
needs_write * (inner__from_state__timestamp - inner__rd_aux_cols__base__prev_timestamp - 1 - (0 + inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + inner__rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32JalrAdapterAir, Rv32JalrCoreAir>
Symbolic machine using 28 unique main columns:
  from_state__pc
  from_state__timestamp
  rs1_ptr
  rs1_aux_cols__base__prev_timestamp
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  rd_ptr
  rd_aux_cols__base__prev_timestamp
  rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  rd_aux_cols__prev_data__0
  rd_aux_cols__prev_data__1
  rd_aux_cols__prev_data__2
  rd_aux_cols__prev_data__3
  needs_write
  imm
  rs1_data__0
  rs1_data__1
  rs1_data__2
  rs1_data__3
  rd_data__0
  rd_data__1
  rd_data__2
  is_valid
  to_pc_least_sig_bit
  to_pc_limbs__0
  to_pc_limbs__1
  imm_sign

// Bus 0 (EXECUTION_BRIDGE):
mult=-is_valid, args=[from_state__pc, from_state__timestamp]
mult=is_valid, args=[to_pc_limbs__0 * 2 + to_pc_limbs__1 * 65536, from_state__timestamp + 2]

// Bus 1 (MEMORY):
mult=2013265920 * is_valid, args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, rs1_aux_cols__base__prev_timestamp]
mult=is_valid, args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, from_state__timestamp + 0]
mult=2013265920 * needs_write, args=[1, rd_ptr, rd_aux_cols__prev_data__0, rd_aux_cols__prev_data__1, rd_aux_cols__prev_data__2, rd_aux_cols__prev_data__3, rd_aux_cols__base__prev_timestamp]
mult=needs_write, args=[1, rd_ptr, from_state__pc + 4 - (0 + rd_data__0 * 256 + rd_data__1 * 65536 + rd_data__2 * 16777216), rd_data__0, rd_data__1, rd_data__2, from_state__timestamp + 1]

// Bus 2 (PC_LOOKUP):
mult=is_valid, args=[from_state__pc, 565 + 0, rd_ptr, rs1_ptr, imm, 1, 0, needs_write, imm_sign]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid, args=[rd_data__1, 8]
mult=is_valid, args=[rd_data__2, 6]
mult=is_valid, args=[to_pc_limbs__1, 14]
mult=is_valid, args=[to_pc_limbs__0, 15]
mult=is_valid, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=needs_write, args=[rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=needs_write, args=[rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid, args=[from_state__pc + 4 - (0 + rd_data__0 * 256 + rd_data__1 * 65536 + rd_data__2 * 16777216), rd_data__0, 0, 0]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0
imm_sign * (imm_sign - 1) = 0
to_pc_least_sig_bit * (to_pc_least_sig_bit - 1) = 0
is_valid * ((rs1_data__0 + rs1_data__1 * 256 + imm - to_pc_limbs__0 * 2 - to_pc_least_sig_bit) * 2013235201 * ((rs1_data__0 + rs1_data__1 * 256 + imm - to_pc_limbs__0 * 2 - to_pc_least_sig_bit) * 2013235201 - 1)) = 0
is_valid * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - to_pc_limbs__0 * 2 - to_pc_least_sig_bit) * 2013235201 - to_pc_limbs__1) * 2013235201 * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - to_pc_limbs__0 * 2 - to_pc_least_sig_bit) * 2013235201 - to_pc_limbs__1) * 2013235201 - 1)) = 0
needs_write * (needs_write - 1) = 0
(1 - is_valid) * needs_write = 0
is_valid * (from_state__timestamp + 0 - rs1_aux_cols__base__prev_timestamp - 1 - (0 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
needs_write * (from_state__timestamp + 1 - rd_aux_cols__base__prev_timestamp - 1 - (0 + rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32LoadStoreAdapterAir, LoadSignExtendCoreAir<4, 8>
Symbolic machine using 36 unique main columns:
  from_state__pc
  from_state__timestamp
  rs1_ptr
  rs1_data__0
  rs1_data__1
  rs1_data__2
  rs1_data__3
  rs1_aux_cols__base__prev_timestamp
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  rd_rs2_ptr
  read_data_aux__base__prev_timestamp
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__1
  imm
  imm_sign
  mem_ptr_limbs__0
  mem_ptr_limbs__1
  mem_as
  write_base_aux__prev_timestamp
  write_base_aux__timestamp_lt_aux__lower_decomp__0
  write_base_aux__timestamp_lt_aux__lower_decomp__1
  needs_write
  opcode_loadb_flag0
  opcode_loadb_flag1
  opcode_loadh_flag
  shift_most_sig_bit
  data_most_sig_bit
  shifted_read_data__0
  shifted_read_data__1
  shifted_read_data__2
  shifted_read_data__3
  prev_data__0
  prev_data__1
  prev_data__2
  prev_data__3

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag), args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, rs1_aux_cols__base__prev_timestamp]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, from_state__timestamp + 0]
mult=2013265920 * (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag), args=[(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * mem_as + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * 1, (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * rd_rs2_ptr - (shift_most_sig_bit * 2 + opcode_loadb_flag1), shift_most_sig_bit * shifted_read_data__2 + (1 - shift_most_sig_bit) * shifted_read_data__0, shift_most_sig_bit * shifted_read_data__3 + (1 - shift_most_sig_bit) * shifted_read_data__1, shift_most_sig_bit * shifted_read_data__0 + (1 - shift_most_sig_bit) * shifted_read_data__2, shift_most_sig_bit * shifted_read_data__1 + (1 - shift_most_sig_bit) * shifted_read_data__3, read_data_aux__base__prev_timestamp]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * mem_as + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * 1, (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * rd_rs2_ptr - (shift_most_sig_bit * 2 + opcode_loadb_flag1), shift_most_sig_bit * shifted_read_data__2 + (1 - shift_most_sig_bit) * shifted_read_data__0, shift_most_sig_bit * shifted_read_data__3 + (1 - shift_most_sig_bit) * shifted_read_data__1, shift_most_sig_bit * shifted_read_data__0 + (1 - shift_most_sig_bit) * shifted_read_data__2, shift_most_sig_bit * shifted_read_data__1 + (1 - shift_most_sig_bit) * shifted_read_data__3, from_state__timestamp + 1]
mult=2013265920 * needs_write, args=[(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * 1 + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * mem_as, (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * rd_rs2_ptr + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) - 0, prev_data__0, prev_data__1, prev_data__2, prev_data__3, write_base_aux__prev_timestamp]
mult=needs_write, args=[(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * 1 + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * mem_as, (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * rd_rs2_ptr + (1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) - 0, (opcode_loadh_flag + opcode_loadb_flag0) * shifted_read_data__0 + opcode_loadb_flag1 * shifted_read_data__1, shifted_read_data__1 * opcode_loadh_flag + (opcode_loadb_flag0 + opcode_loadb_flag1) * (data_most_sig_bit * 255), data_most_sig_bit * 255, data_most_sig_bit * 255, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[from_state__pc, (opcode_loadb_flag0 + opcode_loadb_flag1) * 6 + opcode_loadh_flag * 7 + 528, rd_rs2_ptr, rs1_ptr, imm, 1, mem_as, needs_write, imm_sign]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[shifted_read_data__0 * opcode_loadb_flag0 + shifted_read_data__1 * opcode_loadb_flag1 + shifted_read_data__1 * opcode_loadh_flag - data_most_sig_bit * 128, 7]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[(mem_ptr_limbs__0 - (shift_most_sig_bit * 2 + opcode_loadb_flag1 + 0)) * 1509949441, 14]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[mem_ptr_limbs__1, 13]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=needs_write, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0, 17]
mult=needs_write, args=[write_base_aux__timestamp_lt_aux__lower_decomp__1, 12]

// Algebraic constraints:
opcode_loadb_flag0 * (opcode_loadb_flag0 - 1) = 0
opcode_loadb_flag1 * (opcode_loadb_flag1 - 1) = 0
opcode_loadh_flag * (opcode_loadh_flag - 1) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - 1) = 0
data_most_sig_bit * (data_most_sig_bit - 1) = 0
shift_most_sig_bit * (shift_most_sig_bit - 1) = 0
needs_write * (needs_write - 1) = 0
needs_write * (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - 1) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - needs_write) * (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - 1) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - needs_write) * rd_rs2_ptr = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (from_state__timestamp + 0 - rs1_aux_cols__base__prev_timestamp - 1 - (0 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * ((rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 * ((rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - 1)) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (imm_sign * (imm_sign - 1)) = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - mem_ptr_limbs__1) * 2013235201 * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - mem_ptr_limbs__1) * 2013235201 - 1)) = 0
(mem_as - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * 2) * (mem_as - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * 2 - 1) * (mem_as - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * 2 - 2) = 0
(1 - (0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag)) * mem_as = 0
(0 + opcode_loadb_flag0 + opcode_loadb_flag1 + opcode_loadh_flag) * (from_state__timestamp + 1 - read_data_aux__base__prev_timestamp - 1 - (0 + read_data_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + read_data_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
needs_write * (from_state__timestamp + 2 - write_base_aux__prev_timestamp - 1 - (0 + write_base_aux__timestamp_lt_aux__lower_decomp__0 * 1 + write_base_aux__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32LoadStoreAdapterAir, LoadStoreCoreAir<4>
Symbolic machine using 41 unique main columns:
  from_state__pc
  from_state__timestamp
  rs1_ptr
  rs1_data__0
  rs1_data__1
  rs1_data__2
  rs1_data__3
  rs1_aux_cols__base__prev_timestamp
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  rd_rs2_ptr
  read_data_aux__base__prev_timestamp
  read_data_aux__base__timestamp_lt_aux__lower_decomp__0
  read_data_aux__base__timestamp_lt_aux__lower_decomp__1
  imm
  imm_sign
  mem_ptr_limbs__0
  mem_ptr_limbs__1
  mem_as
  write_base_aux__prev_timestamp
  write_base_aux__timestamp_lt_aux__lower_decomp__0
  write_base_aux__timestamp_lt_aux__lower_decomp__1
  needs_write
  flags__0
  flags__1
  flags__2
  flags__3
  is_valid
  is_load
  read_data__0
  read_data__1
  read_data__2
  read_data__3
  prev_data__0
  prev_data__1
  prev_data__2
  prev_data__3
  write_data__0
  write_data__1
  write_data__2
  write_data__3

// Bus 0 (EXECUTION_BRIDGE):
mult=-is_valid, args=[from_state__pc, from_state__timestamp]
mult=is_valid, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * is_valid, args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, rs1_aux_cols__base__prev_timestamp]
mult=is_valid, args=[1, rs1_ptr, rs1_data__0, rs1_data__1, rs1_data__2, rs1_data__3, from_state__timestamp + 0]
mult=2013265920 * is_valid, args=[is_load * mem_as + (1 - is_load) * 1, is_load * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) + (1 - is_load) * rd_rs2_ptr - ((0 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 1 + (0 + flags__2 * (flags__2 - 1) * 1006632961 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 2 + (0 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 3), read_data__0, read_data__1, read_data__2, read_data__3, read_data_aux__base__prev_timestamp]
mult=is_valid, args=[is_load * mem_as + (1 - is_load) * 1, is_load * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) + (1 - is_load) * rd_rs2_ptr - ((0 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 1 + (0 + flags__2 * (flags__2 - 1) * 1006632961 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 2 + (0 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 3), read_data__0, read_data__1, read_data__2, read_data__3, from_state__timestamp + 1]
mult=2013265920 * needs_write, args=[is_load * 1 + (1 - is_load) * mem_as, is_load * rd_rs2_ptr + (1 - is_load) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) - ((0 + flags__1 * flags__2) * 1 + (0 + flags__0 * flags__2 + flags__1 * flags__3) * 2 + (0 + flags__2 * flags__3) * 3), prev_data__0, prev_data__1, prev_data__2, prev_data__3, write_base_aux__prev_timestamp]
mult=needs_write, args=[is_load * 1 + (1 - is_load) * mem_as, is_load * rd_rs2_ptr + (1 - is_load) * (mem_ptr_limbs__0 + mem_ptr_limbs__1 * 65536) - ((0 + flags__1 * flags__2) * 1 + (0 + flags__0 * flags__2 + flags__1 * flags__3) * 2 + (0 + flags__2 * flags__3) * 3), write_data__0, write_data__1, write_data__2, write_data__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=is_valid, args=[from_state__pc, 528 + ((0 + flags__0 * (flags__0 - 1) * 1006632961) * 0 + (0 + flags__1 * (flags__1 - 1) * 1006632961 + flags__2 * (flags__2 - 1) * 1006632961) * 2 + (0 + flags__3 * (flags__3 - 1) * 1006632961 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 1 + (0 + flags__3 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 3 + (0 + flags__0 * flags__1 + flags__0 * flags__2) * 4 + (0 + flags__0 * flags__3 + flags__1 * flags__2 + flags__1 * flags__3 + flags__2 * flags__3) * 5), rd_rs2_ptr, rs1_ptr, imm, 1, mem_as, needs_write, imm_sign]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=is_valid, args=[(mem_ptr_limbs__0 - ((0 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 1 + (0 + flags__2 * (flags__2 - 1) * 1006632961 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 2 + (0 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * 3 + ((0 + flags__1 * flags__2) * 1 + (0 + flags__0 * flags__2 + flags__1 * flags__3) * 2 + (0 + flags__2 * flags__3) * 3))) * 1509949441, 14]
mult=is_valid, args=[mem_ptr_limbs__1, 13]
mult=is_valid, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[read_data_aux__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=needs_write, args=[write_base_aux__timestamp_lt_aux__lower_decomp__0, 17]
mult=needs_write, args=[write_base_aux__timestamp_lt_aux__lower_decomp__1, 12]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0
flags__0 * ((flags__0 - 1) * (flags__0 - 2)) = 0
flags__1 * ((flags__1 - 1) * (flags__1 - 2)) = 0
flags__2 * ((flags__2 - 1) * (flags__2 - 2)) = 0
flags__3 * ((flags__3 - 1) * (flags__3 - 2)) = 0
(0 + flags__0 + flags__1 + flags__2 + flags__3) * ((0 + flags__0 + flags__1 + flags__2 + flags__3 - 1) * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2)) = 0
(0 + flags__0 + flags__1 + flags__2 + flags__3 - 1) * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * is_valid = 0
is_load - (0 + flags__0 * (flags__0 - 1) * 1006632961 + flags__1 * (flags__1 - 1) * 1006632961 + flags__2 * (flags__2 - 1) * 1006632961 + flags__3 * (flags__3 - 1) * 1006632961 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) = 0
is_load * (is_valid - 1) = 0
write_data__0 - ((0 + flags__0 * (flags__0 - 1) * 1006632961 + flags__1 * (flags__1 - 1) * 1006632961 + flags__3 * (flags__3 - 1) * 1006632961) * read_data__0 + (0 + flags__0 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * read_data__1 + (0 + flags__2 * (flags__2 - 1) * 1006632961 + flags__1 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * read_data__2 + (0 + flags__2 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * read_data__3 + ((0 + flags__3 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__0 * flags__1 + flags__0 * flags__3) * read_data__0 + (0 + flags__0 * flags__2 + flags__1 * flags__2 + flags__1 * flags__3 + flags__2 * flags__3) * prev_data__0)) = 0
write_data__1 - ((0 + flags__0 * (flags__0 - 1) * 1006632961 + flags__1 * (flags__1 - 1) * 1006632961) * read_data__1 + (0 + flags__2 * (flags__2 - 1) * 1006632961) * read_data__3 + ((0 + flags__1 * flags__2) * read_data__0 + (0 + flags__3 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920 + flags__0 * flags__1) * read_data__1 + (0 + flags__0 * flags__2 + flags__0 * flags__3 + flags__1 * flags__3 + flags__2 * flags__3) * prev_data__1)) = 0
write_data__2 - ((0 + flags__0 * (flags__0 - 1) * 1006632961) * read_data__2 + ((0 + flags__0 * flags__2 + flags__1 * flags__3) * read_data__0 + (0 + flags__3 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * read_data__2 + (0 + flags__0 * flags__1 + flags__0 * flags__3 + flags__1 * flags__2 + flags__2 * flags__3) * prev_data__2)) = 0
write_data__3 - ((0 + flags__0 * (flags__0 - 1) * 1006632961) * read_data__3 + ((0 + flags__2 * flags__3) * read_data__0 + (0 + flags__0 * flags__2) * read_data__1 + (0 + flags__3 * (0 + flags__0 + flags__1 + flags__2 + flags__3 - 2) * 2013265920) * read_data__3 + (0 + flags__0 * flags__1 + flags__0 * flags__3 + flags__1 * flags__2 + flags__1 * flags__3) * prev_data__3)) = 0
needs_write * (needs_write - 1) = 0
needs_write * (is_valid - 1) = 0
(is_valid - needs_write) * (is_load - 1) = 0
(is_valid - needs_write) * rd_rs2_ptr = 0
is_valid * (from_state__timestamp + 0 - rs1_aux_cols__base__prev_timestamp - 1 - (0 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + rs1_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
is_valid * ((rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 * ((rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - 1)) = 0
is_valid * (imm_sign * (imm_sign - 1)) = 0
is_valid * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - mem_ptr_limbs__1) * 2013235201 * ((rs1_data__2 + rs1_data__3 * 256 + imm_sign * 65535 + (rs1_data__0 + rs1_data__1 * 256 + imm - mem_ptr_limbs__0) * 2013235201 - mem_ptr_limbs__1) * 2013235201 - 1)) = 0
(mem_as - (is_valid - is_load) * 2) * (mem_as - (is_valid - is_load) * 2 - 1) * (mem_as - (is_valid - is_load) * 2 - 2) = 0
(1 - is_valid) * mem_as = 0
is_valid * (from_state__timestamp + 1 - read_data_aux__base__prev_timestamp - 1 - (0 + read_data_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + read_data_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
needs_write * (from_state__timestamp + 2 - write_base_aux__prev_timestamp - 1 - (0 + write_base_aux__timestamp_lt_aux__lower_decomp__0 * 1 + write_base_aux__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32MultAdapterAir, DivRemCoreAir<4, 8>
Symbolic machine using 59 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2_ptr
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  q__0
  q__1
  q__2
  q__3
  r__0
  r__1
  r__2
  r__3
  zero_divisor
  r_zero
  b_sign
  c_sign
  q_sign
  sign_xor
  c_sum_inv
  r_sum_inv
  r_prime__0
  r_prime__1
  r_prime__2
  r_prime__3
  r_inv__0
  r_inv__1
  r_inv__2
  r_inv__3
  lt_marker__0
  lt_marker__1
  lt_marker__2
  lt_marker__3
  lt_diff
  opcode_div_flag
  opcode_divu_flag
  opcode_rem_flag
  opcode_remu_flag

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag), args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag), args=[1, rs2_ptr, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[1, rs2_ptr, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag), args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[1, rd_ptr, (opcode_div_flag + opcode_divu_flag) * q__0 + (1 - (opcode_div_flag + opcode_divu_flag)) * r__0, (opcode_div_flag + opcode_divu_flag) * q__1 + (1 - (opcode_div_flag + opcode_divu_flag)) * r__1, (opcode_div_flag + opcode_divu_flag) * q__2 + (1 - (opcode_div_flag + opcode_divu_flag)) * r__2, (opcode_div_flag + opcode_divu_flag) * q__3 + (1 - (opcode_div_flag + opcode_divu_flag)) * r__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[from_state__pc, 0 + opcode_div_flag * 0 + opcode_divu_flag * 1 + opcode_rem_flag * 2 + opcode_remu_flag * 3 + 596, rd_ptr, rs1_ptr, rs2_ptr, 1, 0, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=opcode_div_flag + opcode_rem_flag, args=[2 * (b__3 - b_sign * 128), 2 * (c__3 - c_sign * 128), 0, 0]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - (zero_divisor + r_zero), args=[lt_diff - 1, 0, 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[q__0, (0 + (r__0 + c__0 * q__0) - b__0) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[q__1, ((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[q__2, (((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[q__3, ((((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601 + (r__3 + c__0 * q__3 + c__1 * q__2 + c__2 * q__1 + c__3 * q__0) - b__3) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[r__0, (((((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601 + (r__3 + c__0 * q__3 + c__1 * q__2 + c__2 * q__1 + c__3 * q__0) - b__3) * 2005401601 + (0 + c__1 * q__3 + c__2 * q__2 + c__3 * q__1) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[r__1, ((((((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601 + (r__3 + c__0 * q__3 + c__1 * q__2 + c__2 * q__1 + c__3 * q__0) - b__3) * 2005401601 + (0 + c__1 * q__3 + c__2 * q__2 + c__3 * q__1) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + (0 + c__2 * q__3 + c__3 * q__2) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[r__2, (((((((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601 + (r__3 + c__0 * q__3 + c__1 * q__2 + c__2 * q__1 + c__3 * q__0) - b__3) * 2005401601 + (0 + c__1 * q__3 + c__2 * q__2 + c__3 * q__1) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + (0 + c__2 * q__3 + c__3 * q__2) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + (0 + c__3 * q__3) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255) + c__2 * (q_sign * 255) + q__2 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601]
mult=0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag, args=[r__3, ((((((((0 + (r__0 + c__0 * q__0) - b__0) * 2005401601 + (r__1 + c__0 * q__1 + c__1 * q__0) - b__1) * 2005401601 + (r__2 + c__0 * q__2 + c__1 * q__1 + c__2 * q__0) - b__2) * 2005401601 + (r__3 + c__0 * q__3 + c__1 * q__2 + c__2 * q__1 + c__3 * q__0) - b__3) * 2005401601 + (0 + c__1 * q__3 + c__2 * q__2 + c__3 * q__1) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + (0 + c__2 * q__3 + c__3 * q__2) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + (0 + c__3 * q__3) + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255) + c__2 * (q_sign * 255) + q__2 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601 + 0 + (0 + c__0 * (q_sign * 255) + q__0 * (c_sign * 255) + c__1 * (q_sign * 255) + q__1 * (c_sign * 255) + c__2 * (q_sign * 255) + q__2 * (c_sign * 255) + c__3 * (q_sign * 255) + q__3 * (c_sign * 255)) + (1 - r_zero) * (b_sign * 255) - b_sign * 255) * 2005401601]

// Algebraic constraints:
opcode_div_flag * (opcode_div_flag - 1) = 0
opcode_divu_flag * (opcode_divu_flag - 1) = 0
opcode_rem_flag * (opcode_rem_flag - 1) = 0
opcode_remu_flag * (opcode_remu_flag - 1) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag) * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - 1) = 0
(zero_divisor + r_zero) * (zero_divisor + r_zero - 1) = 0
zero_divisor * (zero_divisor - 1) = 0
zero_divisor * c__0 = 0
zero_divisor * (q__0 - 255) = 0
zero_divisor * c__1 = 0
zero_divisor * (q__1 - 255) = 0
zero_divisor * c__2 = 0
zero_divisor * (q__2 - 255) = 0
zero_divisor * c__3 = 0
zero_divisor * (q__3 - 255) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - zero_divisor) * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - zero_divisor - 1) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - zero_divisor) * ((0 + c__0 + c__1 + c__2 + c__3) * c_sum_inv - 1) = 0
r_zero * (r_zero - 1) = 0
r_zero * r__0 = 0
r_zero * r__1 = 0
r_zero * r__2 = 0
r_zero * r__3 = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - (zero_divisor + r_zero)) * (0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - (zero_divisor + r_zero) - 1) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag - (zero_divisor + r_zero)) * ((0 + r__0 + r__1 + r__2 + r__3) * r_sum_inv - 1) = 0
b_sign * (b_sign - 1) = 0
c_sign * (c_sign - 1) = 0
(1 - (opcode_div_flag + opcode_rem_flag)) * b_sign = 0
(1 - (opcode_div_flag + opcode_rem_flag)) * c_sign = 0
b_sign + c_sign - 2 * b_sign * c_sign - sign_xor = 0
q_sign * (q_sign - 1) = 0
(0 + q__0 + q__1 + q__2 + q__3) * ((1 - zero_divisor) * (q_sign - sign_xor)) = 0
(q_sign - sign_xor) * ((1 - zero_divisor) * q_sign) = 0
(1 - sign_xor) * (r__0 - r_prime__0) = 0
sign_xor * (((0 + r__0 + r_prime__0) * 2005401601 - 0) * ((0 + r__0 + r_prime__0) * 2005401601 - 1)) = 0
sign_xor * ((r_prime__0 - 256) * r_inv__0 - 1) = 0
sign_xor * ((1 - (0 + r__0 + r_prime__0) * 2005401601) * r_prime__0) = 0
(1 - sign_xor) * (r__1 - r_prime__1) = 0
sign_xor * ((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 - (0 + r__0 + r_prime__0) * 2005401601) * (((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 - 1)) = 0
sign_xor * ((r_prime__1 - 256) * r_inv__1 - 1) = 0
sign_xor * ((1 - ((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601) * r_prime__1) = 0
(1 - sign_xor) * (r__2 - r_prime__2) = 0
sign_xor * (((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601 - ((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601) * ((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601 - 1)) = 0
sign_xor * ((r_prime__2 - 256) * r_inv__2 - 1) = 0
sign_xor * ((1 - (((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601) * r_prime__2) = 0
(1 - sign_xor) * (r__3 - r_prime__3) = 0
sign_xor * ((((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601 + r__3 + r_prime__3) * 2005401601 - (((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601) * (((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601 + r__3 + r_prime__3) * 2005401601 - 1)) = 0
sign_xor * ((r_prime__3 - 256) * r_inv__3 - 1) = 0
sign_xor * ((1 - ((((0 + r__0 + r_prime__0) * 2005401601 + r__1 + r_prime__1) * 2005401601 + r__2 + r_prime__2) * 2005401601 + r__3 + r_prime__3) * 2005401601) * r_prime__3) = 0
lt_marker__3 * (lt_marker__3 - 1) = 0
(1 - (zero_divisor + r_zero + lt_marker__3)) * (r_prime__3 * (2 * c_sign - 1) + c__3 * (1 - 2 * c_sign)) = 0
lt_marker__3 * (lt_diff - (r_prime__3 * (2 * c_sign - 1) + c__3 * (1 - 2 * c_sign))) = 0
lt_marker__2 * (lt_marker__2 - 1) = 0
(1 - (zero_divisor + r_zero + lt_marker__3 + lt_marker__2)) * (r_prime__2 * (2 * c_sign - 1) + c__2 * (1 - 2 * c_sign)) = 0
lt_marker__2 * (lt_diff - (r_prime__2 * (2 * c_sign - 1) + c__2 * (1 - 2 * c_sign))) = 0
lt_marker__1 * (lt_marker__1 - 1) = 0
(1 - (zero_divisor + r_zero + lt_marker__3 + lt_marker__2 + lt_marker__1)) * (r_prime__1 * (2 * c_sign - 1) + c__1 * (1 - 2 * c_sign)) = 0
lt_marker__1 * (lt_diff - (r_prime__1 * (2 * c_sign - 1) + c__1 * (1 - 2 * c_sign))) = 0
lt_marker__0 * (lt_marker__0 - 1) = 0
(1 - (zero_divisor + r_zero + lt_marker__3 + lt_marker__2 + lt_marker__1 + lt_marker__0)) * (r_prime__0 * (2 * c_sign - 1) + c__0 * (1 - 2 * c_sign)) = 0
lt_marker__0 * (lt_diff - (r_prime__0 * (2 * c_sign - 1) + c__0 * (1 - 2 * c_sign))) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag) * (zero_divisor + r_zero + lt_marker__3 + lt_marker__2 + lt_marker__1 + lt_marker__0 - 1) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag) * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_div_flag + opcode_divu_flag + opcode_rem_flag + opcode_remu_flag) * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32MultAdapterAir, MulHCoreAir<4, 8>
Symbolic machine using 39 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2_ptr
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  a_mul__0
  a_mul__1
  a_mul__2
  a_mul__3
  b_ext
  c_ext
  opcode_mulh_flag
  opcode_mulhsu_flag
  opcode_mulhu_flag

// Bus 0 (EXECUTION_BRIDGE):
mult=-(0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag), args=[from_state__pc, from_state__timestamp]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * (0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag), args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * (0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag), args=[1, rs2_ptr, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[1, rs2_ptr, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * (0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag), args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[1, rd_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[from_state__pc, 593 + (0 + opcode_mulh_flag * 0 + opcode_mulhsu_flag * 1 + opcode_mulhu_flag * 2), rd_ptr, rs1_ptr, rs2_ptr, 1, 0, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=opcode_mulh_flag + opcode_mulhsu_flag, args=[2 * (b__3 - b_ext * 465814468 * 128), (opcode_mulh_flag + 1) * (c__3 - c_ext * 465814468 * 128), 0, 0]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a_mul__0, 2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a_mul__1, 2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a_mul__2, 2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a_mul__3, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a_mul__3)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a__0, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a_mul__3) + (0 + b__1 * c__3 + b__2 * c__2 + b__3 * c__1) + (0 + b__0 * c_ext + c__0 * b_ext) - a__0)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a__1, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a_mul__3) + (0 + b__1 * c__3 + b__2 * c__2 + b__3 * c__1) + (0 + b__0 * c_ext + c__0 * b_ext) - a__0) + (0 + b__2 * c__3 + b__3 * c__2) + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext) - a__1)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a__2, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a_mul__3) + (0 + b__1 * c__3 + b__2 * c__2 + b__3 * c__1) + (0 + b__0 * c_ext + c__0 * b_ext) - a__0) + (0 + b__2 * c__3 + b__3 * c__2) + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext) - a__1) + (0 + b__3 * c__3) + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext + b__2 * c_ext + c__2 * b_ext) - a__2)]
mult=0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag, args=[a__3, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a_mul__0) + (0 + b__0 * c__1 + b__1 * c__0) - a_mul__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a_mul__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a_mul__3) + (0 + b__1 * c__3 + b__2 * c__2 + b__3 * c__1) + (0 + b__0 * c_ext + c__0 * b_ext) - a__0) + (0 + b__2 * c__3 + b__3 * c__2) + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext) - a__1) + (0 + b__3 * c__3) + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext + b__2 * c_ext + c__2 * b_ext) - a__2) + 0 + (0 + b__0 * c_ext + c__0 * b_ext + b__1 * c_ext + c__1 * b_ext + b__2 * c_ext + c__2 * b_ext + b__3 * c_ext + c__3 * b_ext) - a__3)]

// Algebraic constraints:
opcode_mulh_flag * (opcode_mulh_flag - 1) = 0
opcode_mulhsu_flag * (opcode_mulhsu_flag - 1) = 0
opcode_mulhu_flag * (opcode_mulhu_flag - 1) = 0
(0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag) * (0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag - 1) = 0
b_ext * 465814468 * (b_ext * 465814468 - 1) = 0
c_ext * 465814468 * (c_ext * 465814468 - 1) = 0
opcode_mulhu_flag * (b_ext * 465814468) = 0
(opcode_mulhu_flag + opcode_mulhsu_flag) * (c_ext * 465814468) = 0
(0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag) * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag) * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
(0 + opcode_mulh_flag + opcode_mulhsu_flag + opcode_mulhu_flag) * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32MultAdapterAir, MultiplicationCoreAir<4, 8>
Symbolic machine using 31 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rs1_ptr
  rs2_ptr
  reads_aux__0__base__prev_timestamp
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__0__base__timestamp_lt_aux__lower_decomp__1
  reads_aux__1__base__prev_timestamp
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__0
  reads_aux__1__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__base__prev_timestamp
  writes_aux__base__timestamp_lt_aux__lower_decomp__0
  writes_aux__base__timestamp_lt_aux__lower_decomp__1
  writes_aux__prev_data__0
  writes_aux__prev_data__1
  writes_aux__prev_data__2
  writes_aux__prev_data__3
  a__0
  a__1
  a__2
  a__3
  b__0
  b__1
  b__2
  b__3
  c__0
  c__1
  c__2
  c__3
  is_valid

// Bus 0 (EXECUTION_BRIDGE):
mult=-is_valid, args=[from_state__pc, from_state__timestamp]
mult=is_valid, args=[from_state__pc + 4, from_state__timestamp + 3]

// Bus 1 (MEMORY):
mult=2013265920 * is_valid, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, reads_aux__0__base__prev_timestamp]
mult=is_valid, args=[1, rs1_ptr, b__0, b__1, b__2, b__3, from_state__timestamp + 0]
mult=2013265920 * is_valid, args=[1, rs2_ptr, c__0, c__1, c__2, c__3, reads_aux__1__base__prev_timestamp]
mult=is_valid, args=[1, rs2_ptr, c__0, c__1, c__2, c__3, from_state__timestamp + 1]
mult=2013265920 * is_valid, args=[1, rd_ptr, writes_aux__prev_data__0, writes_aux__prev_data__1, writes_aux__prev_data__2, writes_aux__prev_data__3, writes_aux__base__prev_timestamp]
mult=is_valid, args=[1, rd_ptr, a__0, a__1, a__2, a__3, from_state__timestamp + 2]

// Bus 2 (PC_LOOKUP):
mult=is_valid, args=[from_state__pc, 592 + 0, rd_ptr, rs1_ptr, rs2_ptr, 1, 0, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[reads_aux__0__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=is_valid, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[reads_aux__1__base__timestamp_lt_aux__lower_decomp__1, 12]
mult=is_valid, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[writes_aux__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 7 (TUPLE_RANGE_CHECKER_256_2048):
mult=is_valid, args=[a__0, 2005401601 * (0 + (0 + b__0 * c__0) - a__0)]
mult=is_valid, args=[a__1, 2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a__0) + (0 + b__0 * c__1 + b__1 * c__0) - a__1)]
mult=is_valid, args=[a__2, 2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a__0) + (0 + b__0 * c__1 + b__1 * c__0) - a__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a__2)]
mult=is_valid, args=[a__3, 2005401601 * (2005401601 * (2005401601 * (2005401601 * (0 + (0 + b__0 * c__0) - a__0) + (0 + b__0 * c__1 + b__1 * c__0) - a__1) + (0 + b__0 * c__2 + b__1 * c__1 + b__2 * c__0) - a__2) + (0 + b__0 * c__3 + b__1 * c__2 + b__2 * c__1 + b__3 * c__0) - a__3)]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0
is_valid * (from_state__timestamp + 0 - reads_aux__0__base__prev_timestamp - 1 - (0 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__0__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
is_valid * (from_state__timestamp + 1 - reads_aux__1__base__prev_timestamp - 1 - (0 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__0 * 1 + reads_aux__1__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0
is_valid * (from_state__timestamp + 2 - writes_aux__base__prev_timestamp - 1 - (0 + writes_aux__base__timestamp_lt_aux__lower_decomp__0 * 1 + writes_aux__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0


# VmAirWrapper<Rv32RdWriteAdapterAir, Rv32AuipcCoreAir>
Symbolic machine using 20 unique main columns:
  from_state__pc
  from_state__timestamp
  rd_ptr
  rd_aux_cols__base__prev_timestamp
  rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0
  rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1
  rd_aux_cols__prev_data__0
  rd_aux_cols__prev_data__1
  rd_aux_cols__prev_data__2
  rd_aux_cols__prev_data__3
  is_valid
  imm_limbs__0
  imm_limbs__1
  imm_limbs__2
  pc_limbs__0
  pc_limbs__1
  rd_data__0
  rd_data__1
  rd_data__2
  rd_data__3

// Bus 0 (EXECUTION_BRIDGE):
mult=-is_valid, args=[from_state__pc, from_state__timestamp]
mult=is_valid, args=[from_state__pc + 4, from_state__timestamp + 1]

// Bus 1 (MEMORY):
mult=2013265920 * is_valid, args=[1, rd_ptr, rd_aux_cols__prev_data__0, rd_aux_cols__prev_data__1, rd_aux_cols__prev_data__2, rd_aux_cols__prev_data__3, rd_aux_cols__base__prev_timestamp]
mult=is_valid, args=[1, rd_ptr, rd_data__0, rd_data__1, rd_data__2, rd_data__3, from_state__timestamp]

// Bus 2 (PC_LOOKUP):
mult=is_valid, args=[from_state__pc, 576 + 0, rd_ptr, 0, 0 + imm_limbs__0 * 1 + imm_limbs__1 * 256 + imm_limbs__2 * 65536, 1, 0, 0, 0]

// Bus 3 (VARIABLE_RANGE_CHECKER):
mult=is_valid, args=[rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0, 17]
mult=is_valid, args=[rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1, 12]

// Bus 6 (BITWISE_LOOKUP):
mult=is_valid, args=[rd_data__0, rd_data__1, 0, 0]
mult=is_valid, args=[rd_data__2, rd_data__3, 0, 0]
mult=is_valid, args=[imm_limbs__0, imm_limbs__1, 0, 0]
mult=is_valid, args=[imm_limbs__2, pc_limbs__0, 0, 0]
mult=is_valid, args=[pc_limbs__1, (from_state__pc - (rd_data__0 + (0 + pc_limbs__0 * 256 + pc_limbs__1 * 65536))) * 2013265801 * 4, 0, 0]

// Algebraic constraints:
is_valid * (is_valid - 1) = 0
is_valid * (2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0) * (2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0) - 1)) = 0
is_valid * (2005401601 * (pc_limbs__1 + imm_limbs__1 - rd_data__2 + 2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0)) * (2005401601 * (pc_limbs__1 + imm_limbs__1 - rd_data__2 + 2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0)) - 1)) = 0
is_valid * (2005401601 * ((from_state__pc - (rd_data__0 + (0 + pc_limbs__0 * 256 + pc_limbs__1 * 65536))) * 2013265801 + imm_limbs__2 - rd_data__3 + 2005401601 * (pc_limbs__1 + imm_limbs__1 - rd_data__2 + 2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0))) * (2005401601 * ((from_state__pc - (rd_data__0 + (0 + pc_limbs__0 * 256 + pc_limbs__1 * 65536))) * 2013265801 + imm_limbs__2 - rd_data__3 + 2005401601 * (pc_limbs__1 + imm_limbs__1 - rd_data__2 + 2005401601 * (pc_limbs__0 + imm_limbs__0 - rd_data__1 + 0))) - 1)) = 0
is_valid * (from_state__timestamp - rd_aux_cols__base__prev_timestamp - 1 - (0 + rd_aux_cols__base__timestamp_lt_aux__lower_decomp__0 * 1 + rd_aux_cols__base__timestamp_lt_aux__lower_decomp__1 * 131072)) = 0

================================================
FILE: riscv-elf/Cargo.toml
================================================
[package]
name = "powdr-riscv-elf"
description = "powdr RISCV ELF utils"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[dependencies]
powdr-isa-utils.workspace = true
powdr-riscv-types.workspace = true
powdr-syscalls.workspace = true

gimli = "0.31"
goblin = "0.8"
itertools.workspace = true
log.workspace = true
raki = "0.1.4"
thiserror = "1.0"
tracing.workspace = true

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458

[[bin]]
name = "elf-labels"
path = "src/bin/elf-labels.rs"


================================================
FILE: riscv-elf/src/bin/elf-labels.rs
================================================
#![allow(clippy::print_stdout)]

use goblin::elf::{
    header::{EI_CLASS, ELFCLASS32, ELFCLASS64},
    Elf,
};
use powdr_riscv_elf::{load_elf, rv64};
use std::env;
use std::fs;
use std::panic;
use std::path::Path;
use std::process;

fn main() {
    let args: Vec<String> = env::args().collect();

    if args.len() != 2 {
        eprintln!("Usage: {} <elf-file>", args[0]);
        process::exit(1);
    }

    let elf_path = Path::new(&args[1]);

    if !elf_path.exists() {
        eprintln!("Error: File '{}' does not exist", elf_path.display());
        process::exit(1);
    }

    // Read the file to check if it's 32-bit or 64-bit
    let file_buffer = match fs::read(elf_path) {
        Ok(buffer) => buffer,
        Err(e) => {
            eprintln!("Error reading file: {e}");
            process::exit(1);
        }
    };

    let elf = match Elf::parse(&file_buffer) {
        Ok(elf) => elf,
        Err(e) => {
            eprintln!("Error parsing ELF header: {e}");
            process::exit(1);
        }
    };

    match elf.header.e_ident[EI_CLASS] {
        ELFCLASS32 => {
            // The load_elf function panics on errors, so we catch it
            let result = panic::catch_unwind(|| load_elf(elf_path));

            match result {
                Ok(program) => {
                    println!(
                        "RV32 ELF file analyzed successfully: {}",
                        elf_path.display()
                    );
                    println!();
                    print_elf_info_32(&program);
                }
                Err(_) => {
                    eprintln!("Error loading RV32 ELF file: The file may be corrupted or not a valid RISC-V ELF");
                    process::exit(1);
                }
            }
        }
        ELFCLASS64 => {
            // The load_elf_rv64 function panics on errors, so we catch it
            let result = panic::catch_unwind(|| rv64::compute_jumpdests(elf_path));

            match result {
                Ok(labels) => {
                    println!(
                        "RV64 ELF file analyzed successfully: {}",
                        elf_path.display()
                    );
                    println!();
                    print_elf_info_64(&labels);
                }
                Err(_) => {
                    eprintln!("Error loading RV64 ELF file: The file may be corrupted or not a valid RISC-V ELF");
                    process::exit(1);
                }
            }
        }
        _ => {
            eprintln!("Unsupported ELF class");
            process::exit(1);
        }
    }
}

fn print_elf_info_32(program: &powdr_riscv_elf::ElfProgram) {
    // Get text labels from the program
    let text_labels = program.text_labels();

    if text_labels.is_empty() {
        println!("No text labels found in the ELF file.");
    } else {
        println!("Text labels found: {}", text_labels.len());
        println!();
        println!("{:<16}", "Address");
        println!("{}", "-".repeat(16));

        // Text labels are already sorted in BTreeSet
        for address in text_labels {
            println!("0x{address:08x}");
        }
    }

    // Report on debug symbols
    let debug_info = program.debug_info();
    println!();
    println!("Debug information:");

    // Since we can't iterate over SymbolTable directly, we'll use text_labels
    // and look up each address
    let mut symbol_count = 0;
    let mut function_symbols = Vec::new();

    for &addr in text_labels {
        if let Some(name) = debug_info.symbols.try_get_one(addr) {
            symbol_count += 1;
            // Simple heuristic for functions: doesn't start with $ or contain .
            if !name.starts_with("$") && !name.contains(".") {
                function_symbols.push((addr, name));
            }
        }
    }

    println!("  Symbols at text label addresses: {symbol_count}");
    println!("  Function symbols: {}", function_symbols.len());

    if !function_symbols.is_empty() {
        println!();
        println!("Function symbols:");
        println!("{:<16} {:<40}", "Address", "Symbol");
        println!("{}", "-".repeat(60));

        for (address, name) in function_symbols {
            println!("0x{address:08x}      {name}");
        }
    }

    // Also show notes if available
    if !debug_info.notes.is_empty() {
        println!();
        println!("Debug notes:");
        let mut notes: Vec<_> = debug_info.notes.iter().collect();
        notes.sort_by_key(|(addr, _)| *addr);

        for (addr, note) in notes {
            println!("0x{addr:08x}: {note}");
        }
    }
}

fn print_elf_info_64(labels: &rv64::Rv64Labels) {
    println!("Entry point: 0x{:016x}", labels.entry_point);
    println!("PC base: 0x{:016x}", labels.pc_base);
    println!();

    if labels.jumpdests.is_empty() {
        println!("No text labels or jump destinations found.");
    } else {
        println!(
            "Text labels and jump destinations found: {}",
            labels.jumpdests.len()
        );
        println!();

        // Show all labels with symbols if available
        println!("{:<20} {:<40}", "Address", "Symbol (if available)");
        println!("{}", "-".repeat(60));

        for &addr in &labels.jumpdests {
            // Find symbol name if available
            let symbol = labels
                .symbols
                .iter()
                .find(|(sym_addr, _)| *sym_addr == addr)
                .map(|(_, name)| name.as_str())
                .unwrap_or("");

            println!("0x{addr:016x}  {symbol}");
        }

        // Summary of symbols
        println!();
        println!("Summary:");
        println!("  Total labels/jumpdests: {}", labels.jumpdests.len());
        println!("  Named symbols: {}", labels.symbols.len());
        println!(
            "  Jumpdests without symbols: {}",
            labels.jumpdests_with_debug_info.len()
        );

        // Show function-like symbols separately
        let function_symbols: Vec<_> = labels
            .symbols
            .iter()
            .filter(|(_, name)| !name.starts_with("$") && !name.contains("."))
            .collect();

        if !function_symbols.is_empty() {
            println!("  Function symbols: {}", function_symbols.len());
        }

        // Show label to address map
        println!();
        println!("=== Label to Address Map ===");
        println!("{:<40} {:<20}", "Label", "Address");
        println!("{}", "-".repeat(60));

        let mut sorted_symbols = labels.symbols.clone();
        sorted_symbols.sort_by(|a, b| a.1.cmp(&b.1));

        for (addr, name) in sorted_symbols {
            println!("{name:<40} 0x{addr:016x}");
        }

        // Show jumpdests that are not labels
        println!();
        println!("=== Jump Destinations Without Symbols ===");
        println!(
            "{:<20} {:<20} {:<40}",
            "Target Address", "From Address", "Instruction"
        );
        println!("{}", "-".repeat(80));

        let mut sorted_jumpdests: Vec<_> = labels.jumpdests_with_debug_info.iter().collect();
        sorted_jumpdests.sort_by_key(|(addr, _)| *addr);

        for (target_addr, sources) in sorted_jumpdests {
            for source in sources {
                println!(
                    "0x{:016x}  0x{:016x}  {}",
                    target_addr, source.from_addr, source.instruction
                );
            }
        }

        println!();
        println!("PC Base: 0x{:016x}", labels.pc_base);
    }
}


================================================
FILE: riscv-elf/src/debug_info.rs
================================================
use std::{
    borrow::Cow,
    collections::{BTreeMap, BTreeSet, HashMap},
    path::Path,
};

use gimli::{
    read::AttributeValue, DebuggingInformationEntry, Dwarf, EndianSlice, LittleEndian, Operation,
    Unit, UnitRef,
};
use goblin::elf::{
    sym::{STT_FUNC, STT_OBJECT},
    Elf, SectionHeader,
};
use itertools::Itertools;

use super::AddressMap;

type Reader<'a> = EndianSlice<'a, LittleEndian>;

#[derive(thiserror::Error, Debug)]
pub enum Error {
    #[error("no debug information available")]
    NoDebugInfo,
    #[error("DIE tree traversal skipped a level")]
    UnexpectedLevel,
    #[error("failed to parse debug information: {0}")]
    Parsing(#[from] gimli::Error),
}

/// Debug information extracted from the ELF file.
#[derive(Default)]
pub struct DebugInfo {
    /// List of source files: (directory, file name).
    pub file_list: Vec<(String, String)>,
    /// Relates addresses to source locations.
    pub source_locations: Vec<SourceLocationInfo>,
    /// Maps addresses to symbol names.
    pub symbols: SymbolTable,
    /// Human readable notes about an address
    pub notes: HashMap<u32, String>,
}

#[derive(Debug)]
pub struct SourceLocationInfo {
    pub address: u32,
    pub file: u64,
    pub line: u64,
    pub col: u64,
}

impl DebugInfo {
    /// Extracts debug information from the ELF file, if available.
    pub fn new(
        elf: &Elf,
        file_buffer: &[u8],
        address_map: &AddressMap,
        is_data_addr: &dyn Fn(u32) -> bool,
        jump_targets: &BTreeSet<u32>,
    ) -> Result<Self, Error> {
        let dwarf = load_dwarf_sections(elf, file_buffer)?;

        let mut file_list = Vec::new();
        let mut source_locations = Vec::new();
        let mut notes = HashMap::new();

        // Read the ELF symbol table, to be joined with symbols from the DWARF.
        let mut symbols = read_symbol_table(elf);

        // Iterate over the compilation units:
        let mut units_iter = dwarf.units();
        while let Some(unit) = units_iter.next()? {
            let unit = dwarf.unit(unit)?;
            // Shadows the Unit with a reference to itself, because it is more
            // convenient to work with a UnitRef.
            let unit = UnitRef::new(&dwarf, &unit);

            // Read the source locations for this compilation unit.
            let file_index_delta =
                read_source_locations(unit, &mut file_list, &mut source_locations)?;

            read_unit_symbols(
                &dwarf,
                unit,
                file_index_delta,
                is_data_addr,
                jump_targets,
                &mut symbols,
                &mut notes,
            )?;
        }

        // Filter out the source locations that are not in the text section
        filter_locations_in_text(&mut source_locations, address_map);

        // Deduplicate the symbols
        dedup_names(&mut symbols);

        // Index by address, not by name.
        let symbols = SymbolTable(
            symbols
                .into_iter()
                .map(|(name, address)| (address, name))
                .into_group_map()
                .into_iter()
                .collect(),
        );

        Ok(DebugInfo {
            file_list,
            source_locations,
            symbols,
            notes,
        })
    }
}

/// Reads the source locations for a compilation unit.
fn read_source_locations(
    unit: UnitRef<Reader>,
    file_list: &mut Vec<(String, String)>,
    source_locations: &mut Vec<SourceLocationInfo>,
) -> Result<u64, gimli::Error> {
    // Traverse all the line locations for the compilation unit.
    let base_dir = Path::new(
        unit.comp_dir
            .map(|s| s.to_string())
            .transpose()?
            .unwrap_or(""),
    );
    let file_idx_delta = file_list.len() as u64;
    if let Some(line_program) = unit.line_program.clone() {
        // Get the source file listing
        for file_entry in line_program.header().file_names() {
            let directory = file_entry
                .directory(line_program.header())
                .map(|attr| as_str(unit, attr))
                .transpose()?
                .unwrap_or("");

            // This unwrap can not panic because both base_dir and
            // directory have been validated as UTF-8 strings.
            let directory = base_dir
                .join(directory)
                .into_os_string()
                .into_string()
                .unwrap();

            let path = as_str(unit, file_entry.path_name())?;

            file_list.push((directory, path.to_owned()));
        }

        // Get the locations indexed by address
        let mut rows = line_program.rows();
        while let Some((_, row)) = rows.next_row()? {
            // End markers point to the address after the end, so we skip them.
            if row.prologue_end() || row.end_sequence() {
                continue;
            }

            source_locations.push(SourceLocationInfo {
                address: row.address() as u32,
                file: row.file_index() + file_idx_delta,
                line: match row.line() {
                    None => 0,
                    Some(v) => v.get(),
                },
                col: match row.column() {
                    gimli::ColumnType::LeftEdge => 0,
                    gimli::ColumnType::Column(v) => v.get(),
                },
            })
        }
    }

    Ok(file_idx_delta)
}

/// Traverse the tree in which the information about the compilation
/// unit is stored and extract function and variable names.
fn read_unit_symbols(
    dwarf: &Dwarf<Reader>,
    unit: UnitRef<Reader>,
    file_idx_delta: u64,
    is_data_addr: &dyn Fn(u32) -> bool,
    jump_targets: &BTreeSet<u32>,
    symbols: &mut Vec<(String, u32)>,
    notes: &mut HashMap<u32, String>,
) -> Result<(), Error> {
    // To simplify the algorithm, we start the name stack with a placeholder value.
    let mut full_name = vec![None];
    let mut entries = unit.entries();
    while let Some((level_delta, entry)) = entries.next_dfs()? {
        // Get the entry name as a human readable string (this is used in a comment)
        let name = find_attr(entry, gimli::DW_AT_name)
            .map(|name| unit.attr_string(name).map(|s| s.to_string_lossy()))
            .transpose()?;

        match level_delta {
            delta if delta > 1 => return Err(Error::UnexpectedLevel),
            1 => (),
            _ => {
                full_name.truncate((full_name.len() as isize + level_delta - 1) as usize);
            }
        }
        full_name.push(name);

        match entry.tag() {
            // This is the entry for a function or method.
            gimli::DW_TAG_subprogram => {
                let attr = find_attr(entry, gimli::DW_AT_linkage_name);
                let Some(linkage_name) = attr.map(|ln| unit.attr_string(ln)).transpose()? else {
                    // This function has no linkage name in DWARF, so it
                    // must be in ELFs symbol table.
                    continue;
                };

                let start_addresses = get_function_start(dwarf, &unit, entry)?;
                let name = linkage_name.to_string()?;
                for address in start_addresses {
                    if jump_targets.contains(&address) {
                        symbols.push((name.to_owned(), address));
                    }
                }
            }
            // This is the entry for a variable.
            gimli::DW_TAG_variable => {
                let Some(address) = get_static_var_address(&unit, entry)? else {
                    continue;
                };

                if !is_data_addr(address) {
                    continue;
                }

                if full_name.last().is_some() {
                    // The human readable name of the variable is available,
                    // so we assemble a pretty note to go into the comment.
                    let mut file_line = None;
                    if let Some(AttributeValue::FileIndex(file_idx)) =
                        find_attr(entry, gimli::DW_AT_decl_file)
                    {
                        if let Some(AttributeValue::Udata(line)) =
                            find_attr(entry, gimli::DW_AT_decl_line)
                        {
                            file_line = Some((file_idx + file_idx_delta, line));
                        }
                    }

                    let value = format!(
                        "{}{}",
                        full_name
                            .iter()
                            .map(|s| match s {
                                Some(s) => s,
                                None => &Cow::Borrowed("?"),
                            })
                            .join("::"),
                        if let Some((file, line)) = file_line {
                            format!(" at file {file} line {line}")
                        } else {
                            String::new()
                        }
                    );

                    notes.insert(address, value);
                }

                // The variable symbol name is only used as a fallback
                // in case there is no pretty note.
                if let Some(linkage_name) = find_attr(entry, gimli::DW_AT_linkage_name)
                    .map(|ln| unit.attr_string(ln))
                    .transpose()?
                {
                    symbols.push((linkage_name.to_string()?.to_owned(), address));
                }
            }
            _ => {}
        };
    }

    Ok(())
}

fn load_dwarf_sections<'a>(elf: &Elf, file_buffer: &'a [u8]) -> Result<Dwarf<Reader<'a>>, Error> {
    // Index the sections by their names:
    let debug_sections: HashMap<&str, &SectionHeader> = elf
        .section_headers
        .iter()
        .filter_map(|shdr| {
            elf.shdr_strtab
                .get_at(shdr.sh_name)
                .map(|name| (name, shdr))
        })
        .collect();

    if debug_sections.is_empty() {
        return Err(Error::NoDebugInfo);
    }

    // Load the DWARF sections:
    Ok(gimli::Dwarf::load(move |section| {
        Ok::<_, ()>(Reader::new(
            debug_sections
                .get(section.name())
                .map(|shdr| {
                    &file_buffer[shdr.sh_offset as usize..(shdr.sh_offset + shdr.sh_size) as usize]
                })
                .unwrap_or(&[]),
            Default::default(),
        ))
    })
    .unwrap())
}

/// This function linear searches for an attribute of an entry.
///
/// My first idea was to iterate over the attribute list once, matching for all
/// attributes I was interested in. But then I figured out this operation is
/// N*M, where N is the number of attributes in the list and M is the number of
/// attributes I am interested in. So doing the inverse is easier and has the
/// same complexity. Since it is hard to tell in practice which one is faster, I
/// went with the easier approach.
fn find_attr<'a>(
    entry: &DebuggingInformationEntry<Reader<'a>>,
    attr_type: gimli::DwAt,
) -> Option<AttributeValue<Reader<'a>>> {
    let mut attrs = entry.attrs();
    while let Some(attr) = attrs.next().unwrap() {
        if attr.name() == attr_type {
            return Some(attr.value());
        }
    }
    None
}

fn as_str<'a>(
    unit: UnitRef<Reader<'a>>,
    attr: AttributeValue<Reader<'a>>,
) -> Result<&'a str, gimli::Error> {
    unit.attr_string(attr)?.to_string()
}

fn get_static_var_address(
    unit: &Unit<Reader>,
    entry: &DebuggingInformationEntry<Reader>,
) -> Result<Option<u32>, gimli::Error> {
    let Some(attr) = find_attr(entry, gimli::DW_AT_location) else {
        // No location available
        return Ok(None);
    };

    let AttributeValue::Exprloc(address) = attr else {
        // Not an static variable
        return Ok(None);
    };

    // Do the magic to find the variable address
    let mut ops = address.operations(unit.encoding());
    let first_op = ops.next()?;
    let second_op = ops.next()?;
    let (Some(Operation::Address { address }), None) = (first_op, second_op) else {
        // The address is not a constant
        return Ok(None);
    };

    Ok(Some(address as u32))
}

fn get_function_start(
    dwarf: &Dwarf<Reader>,
    unit: &Unit<Reader>,
    entry: &DebuggingInformationEntry<Reader>,
) -> Result<Vec<u32>, gimli::Error> {
    let mut ret = Vec::new();

    if let Some(low_pc) = find_attr(entry, gimli::DW_AT_low_pc)
        .map(|val| dwarf.attr_address(unit, val))
        .transpose()?
        .flatten()
    {
        ret.push(low_pc as u32);
    }

    if let Some(ranges) = find_attr(entry, gimli::DW_AT_ranges)
        .map(|val| dwarf.attr_ranges_offset(unit, val))
        .transpose()?
        .flatten()
    {
        let mut iter = dwarf.ranges(unit, ranges)?;
        while let Some(range) = iter.next()? {
            ret.push(range.begin as u32);
        }
    }

    Ok(ret)
}

/// Filter out source locations that are not in a text section.
fn filter_locations_in_text(locations: &mut Vec<SourceLocationInfo>, address_map: &AddressMap) {
    locations.sort_unstable_by_key(|loc| loc.address);

    let mut done_idx = 0;
    for (&start_addr, &header) in address_map.0.iter() {
        // Remove all entries that are in between done and the start address.
        let start_idx = find_first_idx(&locations[done_idx..], start_addr) + done_idx;
        locations.drain(done_idx..start_idx);

        // The end address is one past the last byte of the section.
        let end_addr = start_addr + header.p_memsz as u32;
        done_idx += find_first_idx(&locations[done_idx..], end_addr);
    }
}

fn find_first_idx(slice: &[SourceLocationInfo], addr: u32) -> usize {
    match slice.binary_search_by_key(&addr, |loc| loc.address) {
        Ok(mut idx) => {
            while idx > 0 && slice[idx - 1].address == addr {
                idx -= 1;
            }
            idx
        }
        Err(idx) => idx,
    }
}

/// Index the symbols by their addresses.
#[derive(Default)]
pub struct SymbolTable(BTreeMap<u32, Vec<String>>);

impl SymbolTable {
    pub fn new(elf: &Elf) -> SymbolTable {
        let mut symbols = read_symbol_table(elf);

        dedup_names(&mut symbols);

        SymbolTable(
            symbols
                .into_iter()
                .map(|(name, addr)| (addr, name.to_string()))
                .into_group_map()
                .into_iter()
                .collect(),
        )
    }

    fn default_label(addr: u32) -> Cow<'static, str> {
        Cow::Owned(format!("__.L{addr:08x}"))
    }

    /// Get a symbol, if the address has one.
    pub fn try_get_one(&self, addr: u32) -> Option<&str> {
        self.0
            .get(&addr)
            .and_then(|v| v.first().map(|s| s.as_str()))
    }

    /// Get a symbol, or a default label formed from the address value.
    pub fn get_one(&self, addr: u32) -> Cow<'_, str> {
        match self.try_get_one(addr) {
            Some(s) => Cow::Borrowed(s),
            None => Self::default_label(addr),
        }
    }

    /// Get all symbol, or a default label formed from the address value.
    pub fn get_all(&self, addr: u32) -> impl Iterator<Item = Cow<'_, str>> {
        static EMPTY: Vec<String> = Vec::new();
        let elems = self.0.get(&addr).unwrap_or(&EMPTY);
        let default = if elems.is_empty() {
            Some(Self::default_label(addr))
        } else {
            None
        };
        elems
            .iter()
            .map(|s| Cow::Borrowed(s.as_str()))
            .chain(default)
    }

    /// Returns a reference to the raw symbol table, mapping addresses to symbol names.
    pub fn table(&self) -> &BTreeMap<u32, Vec<String>> {
        &self.0
    }

    /// Returns a symbol at the address or at the first address before this one that has a symbol.
    /// Also returns the offset of the provided address relative to that symbol.
    pub fn try_get_one_or_preceding(&self, addr: u64) -> Option<(&str, u32)> {
        let addr = u32::try_from(addr).unwrap();
        self.0
            .range(..=addr)
            .last()
            .and_then(|(a, v)| v.first().map(|s| (s.as_str(), addr - a)))
    }

    /// Return the inner table
    pub fn into_table(self) -> BTreeMap<u32, Vec<String>> {
        self.0
    }

    pub fn from_table(table: BTreeMap<u32, Vec<String>>) -> Self {
        Self(table)
    }
}

fn read_symbol_table(elf: &Elf) -> Vec<(String, u32)> {
    elf.syms
        .iter()
        .filter_map(|sym| {
            // We only care about global symbols that have string names, and are
            // either functions or variables.
            if sym.st_name != 0 && (sym.st_type() == STT_OBJECT || sym.st_type() == STT_FUNC) {
                Some((elf.strtab[sym.st_name].to_owned(), sym.st_value as u32))
            } else {
                None
            }
        })
        .collect()
}

/// Deduplicates by removing identical entries and appending the address to
/// repeated names. The vector ends up sorted.
fn dedup_names(symbols: &mut Vec<(String, u32)>) {
    while dedup_names_pass(symbols) {}
}

/// Deduplicates the names of the symbols by appending one level of address to
/// the name.
///
/// Returns `true` if the names were deduplicated.
fn dedup_names_pass(symbols: &mut Vec<(String, u32)>) -> bool {
    symbols.sort_unstable();
    symbols.dedup();

    let mut deduplicated = false;
    let mut iter = symbols.iter_mut();

    // The first different name defines a group, which ends on the next
    // different name. The whole group is deduplicated if it contains more than
    // one element.
    let mut next_group = iter.next().map(|(name, address)| (name, *address));
    while let Some((group_name, group_address)) = next_group {
        let mut group_deduplicated = false;
        next_group = None;

        // Find duplicates and update names in the group
        for (name, address) in &mut iter {
            if name == group_name {
                group_deduplicated = true;
                deduplicated = true;
                *name = format!("{name}_{address:08x}");
            } else {
                next_group = Some((name, *address));
                break;
            }
        }

        // If there were duplicates in the group, update the group leader, too.
        if group_deduplicated {
            *group_name = format!("{group_name}_{group_address:08x}");
        }
    }

    deduplicated
}

#[cfg(test)]
mod tests {
    #[test]
    fn dedup_names() {
        let mut symbols = vec![
            ("baz".to_string(), 0x8000),
            ("bar".to_string(), 0x3000),
            ("foo".to_string(), 0x1000),
            ("bar".to_string(), 0x5000),
            ("foo".to_string(), 0x2000),
            ("baz".to_string(), 0x7000),
            ("baz".to_string(), 0x9000),
            ("doo".to_string(), 0x0042),
            ("baz".to_string(), 0xa000),
            ("baz".to_string(), 0x6000),
            ("bar".to_string(), 0x4000),
        ];

        super::dedup_names(&mut symbols);

        let expected = vec![
            ("bar_00003000".to_string(), 0x3000),
            ("bar_00004000".to_string(), 0x4000),
            ("bar_00005000".to_string(), 0x5000),
            ("baz_00006000".to_string(), 0x6000),
            ("baz_00007000".to_string(), 0x7000),
            ("baz_00008000".to_string(), 0x8000),
            ("baz_00009000".to_string(), 0x9000),
            ("baz_0000a000".to_string(), 0xa000),
            ("doo".to_string(), 0x0042),
            ("foo_00001000".to_string(), 0x1000),
            ("foo_00002000".to_string(), 0x2000),
        ];
        assert_eq!(symbols, expected);

        let mut symbols = vec![
            ("john".to_string(), 0x42),
            ("john".to_string(), 0x87),
            ("john".to_string(), 0x1aa),
            ("john_000001aa".to_string(), 0x1aa),
            ("john_00000042".to_string(), 0x103),
            ("john_00000087".to_string(), 0x103),
        ];

        super::dedup_names(&mut symbols);

        let expected = vec![
            ("john_00000042_00000042".to_string(), 0x42),
            ("john_00000042_00000103".to_string(), 0x103),
            ("john_00000087_00000087".to_string(), 0x87),
            ("john_00000087_00000103".to_string(), 0x103),
            ("john_000001aa".to_string(), 0x1aa),
        ];

        assert_eq!(symbols, expected);
    }
}


================================================
FILE: riscv-elf/src/lib.rs
================================================
use std::{
    cell::Cell,
    cmp::Ordering,
    collections::{btree_map::Entry, BTreeMap, BTreeSet},
    fs,
    path::Path,
};

use goblin::elf::{
    header::{EI_CLASS, EI_DATA, ELFCLASS32, ELFDATA2LSB, EM_RISCV, ET_DYN},
    program_header::PT_LOAD,
    reloc::{R_RISCV_32, R_RISCV_HI20, R_RISCV_RELATIVE},
    Elf, ProgramHeader,
};
use itertools::{Either, Itertools};
use powdr_isa_utils::SingleDataValue;
use powdr_syscalls::Syscall;
use raki::{
    decode::Decode,
    instruction::{Extensions, Instruction as Ins, OpcodeKind as Op},
    Isa,
};

use powdr_riscv_types::{
    self, InstructionArgs, MemEntry, Register, RiscVProgram, SourceFileInfo, Statement,
};

pub mod debug_info;
pub mod rv64;

use self::debug_info::{DebugInfo, SymbolTable};

/// The program header type (p_type) for Powdr prover data segments.
pub const PT_POWDR_PROVER_DATA: u32 = 0x600000da;

pub struct ElfProgram {
    dbg: DebugInfo,
    data_map: BTreeMap<u32, Data>,
    text_labels: BTreeSet<u32>,
    instructions: Vec<HighLevelInsn>,
    prover_data_bounds: (u32, u32),
    entry_point: u32,
}

pub fn load_elf(file_name: &Path) -> ElfProgram {
    log::info!("Loading ELF file: {}", file_name.display());
    let file_buffer = fs::read(file_name).unwrap();
    load_elf_from_buffer(&file_buffer)
}

pub fn load_elf_from_buffer(file_buffer: &[u8]) -> ElfProgram {
    let elf = Elf::parse(file_buffer).unwrap();

    // Assert the file is 32 bits.
    assert_eq!(
        elf.header.e_ident[EI_CLASS], ELFCLASS32,
        "Only 32-bit ELF files are supported!"
    );

    // Assert the file is little-endian.
    assert_eq!(
        elf.header.e_ident[EI_DATA], ELFDATA2LSB,
        "Only little-endian ELF files are supported!"
    );

    // Assert the file contains RISC-V code.
    assert_eq!(
        elf.header.e_machine, EM_RISCV,
        "Only RISC-V ELF files are supported!"
    );

    // Assert this is either a PIE file, or that we have the relocation symbols
    // available. This is needed because we have to lift all the references to
    // code addresses into labels.
    assert!(
        elf.header.e_type == ET_DYN || !elf.shdr_relocs.is_empty(),
        "We can only translate PIE ELFs (-pie) or ELFs with relocation symbols (--emit-relocs)."
    );

    // Map of addresses into memory sections, so we can know what address belong
    // in what section.
    let mut address_map = AddressMap(BTreeMap::new());
    let mut prover_data_bounds = None;
    for ph in elf.program_headers.iter() {
        match ph.p_type {
            PT_LOAD => {
                address_map.0.insert(ph.p_vaddr as u32, ph);
            }
            PT_POWDR_PROVER_DATA => {
                assert_eq!(
                    prover_data_bounds, None,
                    "Only one prover data segment is supported!"
                );
                prover_data_bounds =
                    Some((ph.p_vaddr as u32, ph.p_vaddr as u32 + ph.p_memsz as u32));
            }
            _ => {}
        }
    }

    // If no prover data segment was provided, make it empty.
    let prover_data_bounds = prover_data_bounds.unwrap_or((0, 0));

    // Set of R_RISCV_HI20 relocations, needed in non-PIE code to identify
    // loading of absolute addresses to text.
    let text_rellocs_set: BTreeSet<u32> = elf
        .shdr_relocs
        .iter()
        .flat_map(|(_, r)| r.iter())
        .filter(|r| r.r_type == R_RISCV_HI20)
        .map(|r| r.r_offset as u32)
        .collect();

    // Keep a list of referenced text addresses, so we can generate the labels.
    let mut referenced_text_addrs = BTreeSet::from([elf.entry as u32]);

    // Find the text addresses referenced from text sections and load the data sections.
    let mut data_map = BTreeMap::new();
    for (&addr, &p) in address_map.0.iter() {
        let section_data = &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize];

        if p.is_executable() {
            search_text_addrs(
                addr,
                section_data,
                &address_map,
                &text_rellocs_set,
                &mut referenced_text_addrs,
            );
        } else {
            load_data_section(addr, section_data, &mut data_map);
        }
    }

    // Lift all the references to text addresses in data sections, and add them
    // to the set. How to do this depends on whether the file is PIE or not.
    (if elf.header.e_type == ET_DYN {
        pie_relocate_data_sections
    } else {
        static_relocate_data_sections
    })(
        &elf,
        &address_map,
        &mut data_map,
        &mut referenced_text_addrs,
    );

    // Load all the text sections.
    let mut lifted_text_sections = Vec::new();
    for (&addr, &p) in address_map.0.iter().filter(|(_, p)| p.is_executable()) {
        let section_data = &file_buffer[p.p_offset as usize..(p.p_offset + p.p_filesz) as usize];
        let insns = lift_instructions(
            addr,
            section_data,
            &address_map,
            &text_rellocs_set,
            &referenced_text_addrs,
        );
        if !insns.is_empty() {
            lifted_text_sections.push(insns);
        }
    }

    // Sort text sections by address and flatten them.
    lifted_text_sections.sort_by_key(|insns| insns[0].loc.address);
    let lifted_text_sections = lifted_text_sections
        .into_iter()
        .flatten()
        .collect::<Vec<_>>();

    // Try loading the debug information.
    let debug_info = match debug_info::DebugInfo::new(
        &elf,
        file_buffer,
        &address_map,
        &|key| data_map.contains_key(&key),
        &referenced_text_addrs,
    ) {
        Ok(debug_info) => {
            log::info!("Debug information loaded successfully.");
            debug_info
        }
        Err(err) => {
            match err {
                debug_info::Error::NoDebugInfo => {
                    log::info!("No DWARF debug information found.")
                }
                err => {
                    log::warn!("Error reading DWARF debug information: {err}")
                }
            }
            log::info!("Falling back to using ELF symbol table.");

            DebugInfo {
                symbols: SymbolTable::new(&elf),
                ..Default::default()
            }
        }
    };

    ElfProgram {
        dbg: debug_info,
        data_map,
        text_labels: referenced_text_addrs,
        instructions: lifted_text_sections,
        entry_point: elf.entry as u32,
        prover_data_bounds,
    }
}

fn pie_relocate_data_sections(
    elf: &Elf,
    address_map: &AddressMap,
    data_map: &mut BTreeMap<u32, Data>,
    referenced_text_addrs: &mut BTreeSet<u32>,
) {
    // In PIE files, we can read the dynamic relocation table.
    for r in elf.dynrelas.iter() {
        let addr = r.r_offset as u32;
        if !address_map.is_in_data_section(addr) {
            unimplemented!("We assumed all dynamic relocations were data relocations!");
        }

        // We only support the R_RISCV_RELATIVE relocation type:
        assert_eq!(r.r_type, R_RISCV_RELATIVE, "Unsupported relocation type!");

        let data_value = r.r_addend.unwrap() as u32;

        if address_map.is_in_text_section(data_value) {
            data_map.insert(addr, Data::TextLabel(data_value));

            // We also need to add the referenced address to the list of text
            // addresses, so we can generate the label.
            referenced_text_addrs.insert(data_value);
        } else {
            data_map.insert(addr, Data::Value(data_value));
        }
    }

    assert_eq!(elf.dynrels.len(), 0, "Unsupported relocation type!");
}

fn static_relocate_data_sections(
    elf: &Elf,
    address_map: &AddressMap,
    data_map: &mut BTreeMap<u32, Data>,
    referenced_text_addrs: &mut BTreeSet<u32>,
) {
    // In non-PIE files, we need to use the linking relocation table.
    for r in elf.shdr_relocs.iter().flat_map(|(_, relocs)| relocs.iter()) {
        let addr = r.r_offset as u32;
        if !address_map.is_in_data_section(addr) {
            // Relocation of the text section has already been handled in instruction lifting.
            continue;
        }

        // We only support the R_RISCV_32 relocation type for the data section:
        assert_eq!(r.r_type, R_RISCV_32, "Unsupported relocation type!");

        let Entry::Occupied(mut entry) = data_map.entry(r.r_offset as u32) else {
            panic!("Unexpected 0 in relocated data entry!");
        };

        let Data::Value(original_addr) = *entry.get() else {
            panic!("Related entry already replaced with a label!");
        };

        if address_map.is_in_text_section(original_addr) {
            entry.insert(Data::TextLabel(original_addr));

            // We also need to add the referenced address to the list of text
            // addresses, so we can generate the label.
            referenced_text_addrs.insert(original_addr);
        }
    }
}

impl ElfProgram {
    pub fn debug_info(&self) -> &DebugInfo {
        &self.dbg
    }

    pub fn text_labels(&self) -> &BTreeSet<u32> {
        &self.text_labels
    }
}

impl RiscVProgram for ElfProgram {
    fn take_source_files_info(&mut self) -> impl Iterator<Item = SourceFileInfo<'_>> {
        self.dbg
            .file_list
            .iter()
            .enumerate()
            .map(|(id, (dir, file))| SourceFileInfo {
                // +1 because files are indexed from 1
                id: id as u32 + 1,
                file,
                dir,
            })
    }

    fn take_initial_mem(&mut self) -> impl Iterator<Item = MemEntry> {
        self.data_map.iter().map(|(addr, data)| {
            let value = match data {
                Data::TextLabel(label) => {
                    SingleDataValue::LabelReference(self.dbg.symbols.get_one(*label).into())
                }
                Data::Value(value) => SingleDataValue::Value(*value),
            };

            let label = self
                .dbg
                .notes
                .get(addr)
                .map(|note| note.as_str())
                .or_else(|| self.dbg.symbols.try_get_one(*addr))
                .map(|s| s.to_string());

            MemEntry {
                label,
                addr: *addr,
                value,
            }
        })
    }

    fn take_executable_statements(
        &mut self,
    ) -> impl Iterator<Item = Statement<'_, impl AsRef<str>, impl InstructionArgs>> {
        // In the output, the precedence is labels, locations, and then instructions.
        // We merge the 3 iterators with this operations: merge(labels, merge(locs, instructions)), where each is sorted by address.

        // First the inner merge: locs and instructions.
        let locs = self.dbg.source_locations.iter();
        let instructions = self.instructions.iter();
        let locs_and_instructions = locs
            .map(|loc| (Cell::new(0), loc))
            .merge_join_by(instructions, |next_loc, next_insn| {
                assert!(
                    next_loc.1.address >= next_insn.loc.address,
                    "Debug location {:08x} doesn't match instruction address!",
                    next_loc.1.address
                );
                if next_loc.1.address < next_insn.loc.address + next_insn.loc.size {
                    next_loc.0.set(next_insn.loc.address);
                    true
                } else {
                    false
                }
            })
            .map(|result| match result {
                // Extract the address from the Either, for easier comparison in the next step.
                Either::Left((address, loc)) => (address.get(), Either::Left(loc)),
                Either::Right(insn) => (insn.loc.address, Either::Right(insn)),
            });

        // Now the outer merge: labels and locs_and_instructions.
        let labels = self.text_labels.iter();
        labels
            .merge_join_by(
                locs_and_instructions,
                |&label_addr, (right_addr, _)| match label_addr.cmp(right_addr) {
                    Ordering::Less => panic!("Label {label_addr:08x} doesn't match exact address!"),
                    Ordering::Equal => true,
                    Ordering::Greater => false,
                },
            )
            .flat_map(|result| -> Box<dyn Iterator<Item = _>> {
                match result {
                    Either::Left(label) => {
                        Box::new(self.dbg.symbols.get_all(*label).map(Statement::Label))
                    }
                    Either::Right((_, Either::Left(loc))) => {
                        Box::new(std::iter::once(Statement::DebugLoc {
                            file: loc.file,
                            line: loc.line,
                            col: loc.col,
                        }))
                    }
                    Either::Right((_, Either::Right(insn))) => {
                        Box::new(std::iter::once(Statement::Instruction {
                            op: insn.op,
                            args: WrappedArgs {
                                args: &insn.args,
                                symbol_table: &self.dbg.symbols,
                            },
                        }))
                    }
                }
            })
    }

    fn prover_data_bounds(&self) -> (u32, u32) {
        self.prover_data_bounds
    }

    fn start_function(&self) -> impl AsRef<str> {
        self.dbg.symbols.get_one(self.entry_point)
    }
}

/// The instruction arguments for code generation. Needs the symbol table to
/// translate addresses to labels in the output code.
struct WrappedArgs<'a> {
    args: &'a HighLevelArgs,
    symbol_table: &'a SymbolTable,
}

impl InstructionArgs for WrappedArgs<'_> {
    type Error = String;

    fn l(&self) -> Result<impl AsRef<str>, Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::CodeLabel(addr),
                rd: None,
                rs1: None,
                rs2: None,
            } => Ok(self.symbol_table.get_one(*addr).to_string()),
            _ => Err(format!("Expected: label, got {:?}", self.args)),
        }
    }

    fn r(&self) -> Result<Register, Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::None,
                rd: None,
                rs1: Some(rs1),
                rs2: None,
            } => Ok(Register::new(*rs1 as u8)),
            _ => Err(format!("Expected: rs1, got {:?}", self.args)),
        }
    }

    fn rri(&self) -> Result<(Register, Register, u32), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::Value(imm),
                rd: Some(rd),
                rs1: Some(rs1),
                rs2: None,
            } => Ok((
                Register::new(*rd as u8),
                Register::new(*rs1 as u8),
                *imm as u32,
            )),
            _ => Err(format!("Expected: rd, rs1, imm, got {:?}", self.args)),
        }
    }

    fn rrr(&self) -> Result<(Register, Register, Register), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::None,
                rd: Some(rd),
                rs1: Some(rs1),
                rs2: Some(rs2),
            } => Ok((
                Register::new(*rd as u8),
                Register::new(*rs1 as u8),
                Register::new(*rs2 as u8),
            )),
            _ => Err(format!("Expected: rd, rs1, rs2, got {:?}", self.args)),
        }
    }

    fn rrr2(&self) -> Result<(Register, Register, Register), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::None,
                rd: Some(rd),
                rs1: Some(rs1),
                rs2: Some(rs2),
            } => Ok((
                Register::new(*rd as u8),
                Register::new(*rs2 as u8),
                Register::new(*rs1 as u8),
            )),
            _ => Err(format!("Expected: rd, rs2, rs1, got {:?}", self.args)),
        }
    }

    fn ri(&self) -> Result<(Register, u32), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::Value(imm),
                rd: Some(rd),
                rs1: None,
                rs2: None,
            } => Ok((Register::new(*rd as u8), *imm as u32)),
            _ => Err(format!("Expected: rd, imm, got {:?}", self.args)),
        }
    }

    fn rr(&self) -> Result<(Register, Register), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::None,
                rd: Some(rd),
                rs1: Some(rs1),
                rs2: None,
            } => Ok((Register::new(*rd as u8), Register::new(*rs1 as u8))),
            _ => Err(format!("Expected: rd, rs1, got {:?}", self.args)),
        }
    }

    fn rrl(
        &self,
    ) -> Result<(Register, Register, impl AsRef<str>), <Self as InstructionArgs>::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::CodeLabel(addr),
                rd: None,
                rs1: Some(rs1),
                rs2: Some(rs2),
            } => Ok((
                Register::new(*rs1 as u8),
                Register::new(*rs2 as u8),
                self.symbol_table.get_one(*addr).to_string(),
            )),
            _ => Err(format!("Expected: rs1, rs2, label, got {:?}", self.args)),
        }
    }

    fn rl(&self) -> Result<(Register, impl AsRef<str>), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::CodeLabel(addr),
                rd: None,
                rs1: Some(rs1),
                rs2: None,
            } => Ok((
                Register::new(*rs1 as u8),
                self.symbol_table.get_one(*addr).to_string(),
            )),
            HighLevelArgs {
                imm: HighLevelImmediate::CodeLabel(addr),
                rd: Some(rd),
                rs1: None,
                rs2: None,
            } => Ok((
                Register::new(*rd as u8),
                self.symbol_table.get_one(*addr).into(),
            )),
            _ => Err(format!("Expected: {{rs1|rd}}, label, got {:?}", self.args)),
        }
    }

    fn rro(&self) -> Result<(Register, Register, u32), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::Value(imm),
                rd: Some(rd),
                rs1: Some(rs1),
                rs2: None,
            } => Ok((
                Register::new(*rd as u8),
                Register::new(*rs1 as u8),
                *imm as u32,
            )),
            HighLevelArgs {
                imm: HighLevelImmediate::Value(imm),
                rd: None,
                rs1: Some(rs1),
                rs2: Some(rs2),
            } => Ok((
                Register::new(*rs2 as u8),
                Register::new(*rs1 as u8),
                *imm as u32,
            )),
            _ => Err(format!(
                "Expected: {{rd, rs1 | rs2, rs1}}, imm, got {:?}",
                self.args
            )),
        }
    }

    fn empty(&self) -> Result<(), Self::Error> {
        match self.args {
            HighLevelArgs {
                imm: HighLevelImmediate::None,
                rd: None,
                rs1: None,
                rs2: None,
            } => Ok(()),
            _ => Err(format!("Expected: no args, got {:?}", self.args)),
        }
    }
}

/// Indexes the program sections by their virtual address.
///
/// Allows for querying if an address is in a data or text section.
pub struct AddressMap<'a>(BTreeMap<u32, &'a ProgramHeader>);

impl AddressMap<'_> {
    fn is_in_data_section(&self, addr: u32) -> bool {
        self.get_section_of_addr(addr)
            .is_some_and(|section| !section.is_executable())
    }

    fn is_in_text_section(&self, addr: u32) -> bool {
        self.get_section_of_addr(addr)
            .is_some_and(ProgramHeader::is_executable)
    }

    fn get_section_of_addr(&self, addr: u32) -> Option<&ProgramHeader> {
        // Get the latest section that starts before the address.
        let section = self
            .0
            .range(..=addr)
            .next_back()
            .map(|(_, &section)| section)?;

        if addr > section.p_vaddr as u32 + section.p_memsz as u32 {
            // The address is after the end of the section.
            None
        } else {
            Some(section)
        }
    }
}

#[derive(Debug)]
enum Data {
    TextLabel(u32),
    Value(u32),
}

fn load_data_section(mut addr: u32, data: &[u8], data_map: &mut BTreeMap<u32, Data>) {
    for word in data.chunks(4) {
        let mut padded = [0; 4];
        padded[..word.len()].copy_from_slice(word);

        let value = u32::from_le_bytes(padded);
        if value != 0 {
            data_map.insert(addr, Data::Value(value));
        } else {
            // We don't need to store zero values, as they are implicit.
        }

        addr += 4;
    }
}

enum UnimpOrInstruction {
    Unimp16,
    Unimp32,
    Instruction(Ins),
}

impl UnimpOrInstruction {
    fn len(&self) -> u32 {
        match self {
            UnimpOrInstruction::Unimp16 => 2,
            UnimpOrInstruction::Unimp32 => 4,
            UnimpOrInstruction::Instruction(ins) => match ins.extension {
                Extensions::C => 2,
                _ => 4,
            },
        }
    }
}

struct MaybeInstruction {
    address: u32,
    insn: UnimpOrInstruction,
}

#[derive(Debug)]
enum HighLevelImmediate {
    None,
    CodeLabel(u32),
    Value(i32),
}

#[derive(Debug)]
struct HighLevelArgs {
    rd: Option<u32>,
    rs1: Option<u32>,
    rs2: Option<u32>,
    imm: HighLevelImmediate,
}

/// The default args are all empty.
impl Default for HighLevelArgs {
    fn default() -> Self {
        HighLevelArgs {
            rd: None,
            rs1: None,
            rs2: None,
            imm: HighLevelImmediate::None,
        }
    }
}

#[derive(Debug)]
struct Location {
    address: u32,
    size: u32,
}

#[derive(Debug)]
struct HighLevelInsn {
    loc: Location,
    op: &'static str,
    args: HighLevelArgs,
}

enum ReadOrWrite<'a, T> {
    Read(&'a T),
    Write(&'a mut T),
}

struct InstructionLifter<'a> {
    rellocs_set: &'a BTreeSet<u32>,
    address_map: &'a AddressMap<'a>,
    referenced_text_addrs: ReadOrWrite<'a, BTreeSet<u32>>,
}

impl InstructionLifter<'_> {
    fn composed_immediate(
        &self,
        hi: i32,
        lo: i32,
        rd_ui: usize,
        rd_addi: usize,
        insn2_addr: u32,
        is_address: bool,
    ) -> Option<(&'static str, HighLevelArgs)> {
        let immediate = hi.wrapping_add(lo);

        let is_ref_to_text = is_address && self.address_map.is_in_text_section(immediate as u32) &&
            // This is very sad: sometimes the global pointer lands in the
            // middle of the text section, so we have to make an exception when
            // setting the gp (x3).
            rd_addi != 3;

        let (op, imm) = if is_ref_to_text {
            // If rd_ui != rd_addi, we don't set rd_ui, thus our behavior is not
            // conformant, but it is probably fine for compiler generated code,
            // and it has worked so far.
            ("la", HighLevelImmediate::CodeLabel(immediate as u32))
        } else if rd_ui == rd_addi {
            if let ReadOrWrite::Read(referenced_text_addrs) = &self.referenced_text_addrs {
                if referenced_text_addrs.contains(&insn2_addr) {
                    // We can't join the two instructions because there is a
                    // jump to the second. Let each one be handled separately.
                    return None;
                }
            }
            ("li", HighLevelImmediate::Value(immediate))
        } else {
            // This pair of instructions leaks rd_ui. Since this is not a
            // reference to text, we can afford to be more conformant and handle
            // each instruction separately.
            return None;
        };

        Some((
            op,
            HighLevelArgs {
                rd: Some(rd_ui as u32),
                imm,
                ..Default::default()
            },
        ))
    }
}

impl TwoOrOneMapper<MaybeInstruction, HighLevelInsn> for InstructionLifter<'_> {
    fn try_map_two(
        &mut self,
        insn1: &MaybeInstruction,
        insn2: &MaybeInstruction,
    ) -> Option<HighLevelInsn> {
        use UnimpOrInstruction::Instruction as I;

        let loc = Location {
            address: insn1.address,
            size: insn1.insn.len() + insn2.insn.len(),
        };
        let insn2_addr = insn2.address;
        let (I(insn1), I(insn2)) = (&insn1.insn, &insn2.insn) else {
            return None;
        };

        let result = match (insn1, insn2) {
            (
                // li rd, immediate
                Ins {
                    opc: Op::LUI,
                    rd: Some(rd_lui),
                    imm: Some(hi),
                    ..
                },
                Ins {
                    opc: Op::ADDI,
                    rd: Some(rd_addi),
                    rs1: Some(rs1_addi),
                    imm: Some(lo),
                    ..
                },
            ) if rd_lui == rs1_addi => {
                // Sometimes, in non-PIE code, this pair of instructions is used
                // to load an address into a register. We must check if this is
                // the case, and if the address points to a text section, we
                // must load it from a label.
                let is_address = self.rellocs_set.contains(&loc.address);
                let (op, args) =
                    self.composed_immediate(*hi, *lo, *rd_lui, *rd_addi, insn2_addr, is_address)?;

                HighLevelInsn { op, args, loc }
            }
            (
                // inline-able system call:
                //   addi t0, x0, immediate
                //   ecall
                Ins {
                    opc: Op::ADDI,
                    rd: Some(5),
                    rs1: Some(0),
                    imm: Some(opcode),
                    ..
                },
                Ins { opc: Op::ECALL, .. },
            ) => {
                // If this is not a know system call, we just let the executor deal with the problem.
                let syscall = u8::try_from(*opcode)
                    .ok()
                    .and_then(|opcode| Syscall::try_from(opcode).ok())?;

                HighLevelInsn {
                    loc,
                    op: syscall.name(),
                    args: Default::default(),
                }
            }
            (
                // All other double instructions we can lift start with auipc.
                Ins {
                    opc: Op::AUIPC,
                    rd: Some(rd_auipc),
                    imm: Some(hi),
                    ..
                },
                insn2,
            ) => {
                let hi = hi.wrapping_add(loc.address as i32);
                match insn2 {
                    // la rd, symbol
                    Ins {
                        opc: Op::ADDI,
                        rd: Some(rd_addi),
                        rs1: Some(rs1_addi),
                        imm: Some(lo),
                        ..
                    } if rd_auipc == rs1_addi => {
                        // AUIPC obviously always refer to an address.
                        const IS_ADDRESS: bool = true;
                        let (op, args) = self.composed_immediate(
                            hi, *lo, *rd_auipc, *rd_addi, insn2_addr, IS_ADDRESS,
                        )?;

                        HighLevelInsn { op, args, loc }
                    }
                    // l{b|h|w}[u] rd, symbol
                    Ins {
                        opc: l_op,
                        rd: Some(rd_l),
                        rs1: Some(rs1_l),
                        rs2: None,
                        imm: Some(lo),
                        ..
                    } if matches!(l_op, Op::LB | Op::LH | Op::LW | Op::LBU | Op::LHU)
                        && rd_auipc == rd_l
                        && rd_l == rs1_l =>
                    {
                        // We don't support code introspection, so it is better
                        // to panic if this is the case:
                        let addr = hi.wrapping_add(*lo);
                        assert!(!self.address_map.is_in_text_section(addr as u32));

                        HighLevelInsn {
                            op: l_op.to_string(),
                            args: HighLevelArgs {
                                rd: Some(*rd_l as u32),
                                rs1: Some(0), // this is x0 because the entire address is in the immediate
                                imm: HighLevelImmediate::Value(addr),
                                ..Default::default()
                            },
                            loc,
                        }
                    }
                    // s{b|h|w} rd, symbol, rt
                    Ins {
                        opc: l_op,
                        rd: None,
                        rs1: Some(rt_l),
                        rs2: Some(_),
                        imm: Some(lo),
                        ..
                    } if matches!(l_op, Op::SB | Op::SH | Op::SW) && rd_auipc == rt_l => {
                        // We don't support code modification, so it is better
                        // to panic if this is the case:
                        let addr = hi.wrapping_add(*lo);
                        assert!(!self.address_map.is_in_text_section(addr as u32));

                        // Otherwise, this is a data store instruction. To be
                        // more conformant, it is better to let two
                        // instructions be handled separately.
                        return None;
                    }
                    // call offset
                    Ins {
                        opc: Op::JALR,
                        rd: Some(link_reg),
                        rs1: Some(hi_reg),
                        rs2: None,
                        imm: Some(lo),
                        ..
                    } if rd_auipc == hi_reg && hi_reg == link_reg => HighLevelInsn {
                        op: "jal",
                        args: HighLevelArgs {
                            imm: HighLevelImmediate::CodeLabel(hi.wrapping_add(*lo) as u32),
                            rd: Some(*link_reg as u32),
                            ..Default::default()
                        },
                        loc,
                    },
                    // tail offset
                    Ins {
                        opc: Op::JALR,
                        rd: Some(0),
                        rs1: Some(6),
                        rs2: None,
                        imm: Some(lo),
                        ..
                    } if *rd_auipc == 6 => HighLevelInsn {
                        op: "tail",
                        args: HighLevelArgs {
                            imm: HighLevelImmediate::CodeLabel(hi.wrapping_add(*lo) as u32),
                            ..Default::default()
                        },
                        loc,
                    },
                    _ => {
                        panic!(
                            "Unexpected instruction after AUIPC: {insn2:?} at {:08x}",
                            loc.address
                        );
                    }
                }
            }
            _ => return None,
        };

        // TODO: implement here other kinds of RISC-V fusions as optimization.

        if let (ReadOrWrite::Write(refs), HighLevelImmediate::CodeLabel(addr)) =
            (&mut self.referenced_text_addrs, &result.args.imm)
        {
            refs.insert(*addr);
        }

        Some(result)
    }

    fn map_one(&mut self, insn: MaybeInstruction) -> HighLevelInsn {
        let loc = Location {
            address: insn.address,
            size: insn.insn.len(),
        };
        let UnimpOrInstruction::Instruction(insn) = insn.insn else {
            return HighLevelInsn {
                op: "unimp",
                args: Default::default(),
                loc,
            };
        };

        let mut imm = match insn.opc {
            // All jump instructions that have an address as immediate
            Op::JAL | Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => {
                let addr = (insn.imm.unwrap() + loc.address as i32) as u32;
                if let ReadOrWrite::Write(refs) = &mut self.referenced_text_addrs {
                    refs.insert(addr);
                }

                HighLevelImmediate::CodeLabel(addr)
            }
            // We currently only support standalone jalr if offset is zero
            Op::JALR => {
                assert!(
                    insn.imm.unwrap() == 0,
                    "jalr with non-zero offset is not supported"
                );

                HighLevelImmediate::Value(0)
            }
            // LUI is special because the decoder already shifts the immediate,
            // but the code gen expects it unshifted, so we have to undo.
            Op::LUI => HighLevelImmediate::Value(insn.imm.unwrap() >> 12),
            // We don't support arbitrary AUIPCs, but it is trivial to transform
            // one to an LI. If it passed the two-by-two transformation and got
            // here, this is a reference to data, so it is safe to transform it.
            Op::AUIPC => {
                return HighLevelInsn {
                    op: "li",
                    args: HighLevelArgs {
                        rd: insn.rd.map(|x| x as u32),
                        imm: HighLevelImmediate::Value(
                            insn.imm.unwrap().wrapping_add(loc.address as i32),
                        ),
                        ..Default::default()
                    },
                    loc,
                };
            }
            // All other instructions, which have the immediate as a value
            _ => match insn.imm {
                Some(imm) => HighLevelImmediate::Value(imm),
                None => HighLevelImmediate::None,
            },
        };

        // The acquire and release bits of an atomic instructions are decoded as
        // the immediate value, but we don't need the bits and an immediate is
        // not expected, so we must remove it.
        if let Extensions::A = insn.extension {
            imm = HighLevelImmediate::None;
        }

        // TODO: lift other instructions to their pseudoinstructions,
        // because they can have simplified implementations (like the
        // branch-zero variants and add to x0).

        HighLevelInsn {
            op: insn.opc.to_string(),
            args: HighLevelArgs {
                rd: insn.rd.map(|x| x as u32),
                rs1: insn.rs1.map(|x| x as u32),
                rs2: insn.rs2.map(|x| x as u32),
                imm,
            },
            loc,
        }
    }
}

/// Find all the references to text addresses in the instructions and add them
/// to the set.
fn search_text_addrs(
    base_addr: u32,
    data: &[u8],
    address_map: &AddressMap,
    rellocs_set: &BTreeSet<u32>,
    referenced_text_addrs: &mut BTreeSet<u32>,
) {
    try_map_two_by_two(
        RiscVInstructionIterator::new(base_addr, data),
        InstructionLifter {
            rellocs_set,
            address_map,
            referenced_text_addrs: ReadOrWrite::Write(referenced_text_addrs),
        },
    );
}

/// Lift the instructions back to higher-level instructions.
///
/// Turn addresses into labels and merge instructions into
/// pseudoinstructions.
fn lift_instructions(
    base_addr: u32,
    data: &[u8],
    address_map: &AddressMap,
    rellocs_set: &BTreeSet<u32>,
    referenced_text_addrs: &BTreeSet<u32>,
) -> Vec<HighLevelInsn> {
    try_map_two_by_two(
        RiscVInstructionIterator::new(base_addr, data),
        InstructionLifter {
            rellocs_set,
            address_map,
            referenced_text_addrs: ReadOrWrite::Read(referenced_text_addrs),
        },
    )
}

struct RiscVInstructionIterator<'a> {
    curr_address: u32,
    remaining_data: &'a [u8],
}

impl RiscVInstructionIterator<'_> {
    fn new(base_addr: u32, data: &[u8]) -> RiscVInstructionIterator<'_> {
        RiscVInstructionIterator {
            curr_address: base_addr,
            remaining_data: data,
        }
    }
}

impl Iterator for RiscVInstructionIterator<'_> {
    type Item = MaybeInstruction;

    fn next(&mut self) -> Option<Self::Item> {
        if self.remaining_data.is_empty() {
            return None;
        }

        // Decide if the next instruction is 32 bits or 16 bits ("C" extension):
        let advance;
        let maybe_insn;
        if self.remaining_data[0] & 0b11 == 0b11 {
            // 32 bits
            advance = 4;
            let insn = u32::from_le_bytes(
                self.remaining_data[0..4]
                    .try_into()
                    .expect("Not enough bytes to complete a 32-bit instruction"),
            )
            .decode(Isa::Rv32);

            // When C extension is disabled, both LLVM and GNU binutils uses the
            // privileged instruction CSRRW to represent the `unimp` mnemonic.
            // https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/Xu6UmcIAKIk/m/piJEHdBlAAAJ
            //
            // We must handle this case here.

            let insn = if let Ok(insn) = insn {
                if matches!(insn.opc, Op::CSRRW) {
                    UnimpOrInstruction::Unimp32
                } else {
                    UnimpOrInstruction::Instruction(insn)
                }
            } else {
                UnimpOrInstruction::Unimp32
            };

            maybe_insn = MaybeInstruction {
                address: self.curr_address,
                insn,
            };
        } else {
            // 16 bits
            advance = 2;
            let bin_instruction = u16::from_le_bytes(
                self.remaining_data[0..2]
                    .try_into()
                    .expect("Not enough bytes to complete a 16-bit instruction"),
            );
            maybe_insn = MaybeInstruction {
                address: self.curr_address,
                insn: match bin_instruction.decode(Isa::Rv32) {
                    Ok(c_insn) => UnimpOrInstruction::Instruction(to_32bit_equivalent(c_insn)),
                    Err(raki::decode::DecodingError::IllegalInstruction) => {
                        // Although not a real RISC-V instruction, sometimes 0x0000
                        // is used on purpose as an illegal instruction (it even has
                        // its own mnemonic "unimp"), so we support it here.
                        // Otherwise, there is something more fishy going on, and we
                        // panic.

                        // TODO: maybe we should just emit `unimp` for every unknown.
                        assert_eq!(
                            bin_instruction, 0,
                            "Failed to decode 16-bit instruction at {:08x}",
                            self.curr_address
                        );
                        UnimpOrInstruction::Unimp16
                    }
                    Err(err) => panic!(
                        "Unexpected decoding error at {:08x}: {err:?}",
                        self.curr_address
                    ),
                },
            };
        }

        // Advance the address and the data
        self.curr_address += advance;
        self.remaining_data = &self.remaining_data[advance as usize..];

        Some(maybe_insn)
    }
}

/// Translates an extension "C" instruction to the equivalent 32-bit instruction.
fn to_32bit_equivalent(mut insn: Ins) -> Ins {
    let new_opc = match insn.opc {
        Op::C_LW => Op::LW,
        Op::C_SW => Op::SW,
        Op::C_NOP => {
            return Ins {
                opc: Op::ADDI,
                rd: Some(0),
                rs1: Some(0),
                ..insn
            }
        }
        Op::C_ADDI | Op::C_ADDI16SP => Op::ADDI,
        Op::C_ADDI4SPN => {
            return Ins {
                opc: Op::ADDI,
                rs1: Some(2), // add to x2 (stack pointer)
                ..insn
            };
        }
        Op::C_LI => {
            return Ins {
                opc: Op::ADDI,
                rs1: Some(0),
                ..insn
            }
        }
        Op::C_JAL => {
            return Ins {
                opc: Op::JAL,
                rd: Some(1), // output to x1 (return address)
                ..insn
            };
        }
        Op::C_LUI => Op::LUI,
        Op::C_SRLI => Op::SRLI,
        Op::C_SRAI => Op::SRAI,
        Op::C_ANDI => Op::ANDI,
        Op::C_SUB => Op::SUB,
        Op::C_XOR => Op::XOR,
        Op::C_OR => Op::OR,
        Op::C_AND => Op::AND,
        Op::C_J => {
            return Ins {
                opc: Op::JAL,
                rd: Some(0), // discard output
                ..insn
            };
        }
        Op::C_BEQZ => {
            return Ins {
                opc: Op::BEQ,
                rs2: Some(0), // compare with zero
                ..insn
            };
        }
        Op::C_BNEZ => {
            return Ins {
                opc: Op::BNE,
                rs2: Some(0), // compare with zero
                ..insn
            };
        }
        Op::C_SLLI => Op::SLLI,
        Op::C_LWSP => {
            return Ins {
                opc: Op::LW,
                rs1: Some(2), // load relative to x2 (stack pointer)
                ..insn
            };
        }
        Op::C_JR => {
            return Ins {
                opc: Op::JALR,
                // discard the return address:
                rd: Some(0),
                // There is a binary value for rs2 in C.JR (set to 0), which is
                // returned by the decoder, but there isn't an equivalent to the
                // expanded JALR instruction, so we must set None here:
                rs2: None,
                imm: Some(0),
                ..insn
            };
        }
        Op::C_MV => {
            return Ins {
                opc: Op::ADD,
                rs1: Some(0), // add to zero
                ..insn
            };
        }
        Op::C_EBREAK => Op::EBREAK,
        Op::C_JALR => {
            return Ins {
                opc: Op::JALR,
                // output to x1 (return address):
                rd: Some(1),
                // There is a binary value for rs2 in C.JALR (set to 0), which
                // is returned by the decoder, but there isn't an equivalent to
                // the expanded JALR instruction, so we must set None here:
                rs2: None,
                imm: Some(0), // jump to the exact address
                ..insn
            };
        }
        Op::C_ADD => Op::ADD,
        Op::C_SWSP => {
            return Ins {
                opc: Op::SW,
                rs1: Some(2), // store relative to x2 (stack pointer)
                ..insn
            };
        }
        Op::C_LD | Op::C_SD | Op::C_ADDIW | Op::C_SUBW | Op::C_ADDW | Op::C_LDSP | Op::C_SDSP => {
            unreachable!("not a riscv32 instruction")
        }
        _ => unreachable!("not a RISC-V \"C\" extension instruction"),
    };

    insn.opc = new_opc;
    insn
}

/// Helper trait for function `try_map_two_by_two`.
///
/// Provides the methods to try to map two elements into one first, and one to
/// one as fallback.
trait TwoOrOneMapper<E, R> {
    /// Tries to map two elements into one. If it fails, `map_one` is called.
    fn try_map_two(&mut self, first: &E, second: &E) -> Option<R>;
    /// Maps one element individually. This one can not fail.
    fn map_one(&mut self, element: E) -> R;
}

/// Takes an iterator, and maps the elements two by two. If fails, maps
/// individually.
///
/// TODO: this would be more elegant as a generator, but they are unstable.
fn try_map_two_by_two<E, R>(
    input: impl Iterator<Item = E>,
    mut mapper: impl TwoOrOneMapper<E, R>,
) -> Vec<R> {
    let mut result = Vec::new();
    let mut iter = input.peekable();

    while let Some(first) = iter.next() {
        if let Some(second) = iter.peek() {
            if let Some(mapped) = mapper.try_map_two(&first, second) {
                result.push(mapped);
                iter.next();
            } else {
                result.push(mapper.map_one(first));
            }
        } else {
            result.push(mapper.map_one(first));
        }
    }

    result
}


================================================
FILE: riscv-elf/src/rv64.rs
================================================
use std::collections::BTreeSet;
use std::fs;
use std::path::Path;

use goblin::elf::{
    header::{EI_CLASS, EI_DATA, ELFCLASS64, ELFDATA2LSB, EM_RISCV},
    Elf,
};
use raki::{decode::Decode, instruction::OpcodeKind as Op, Isa};

/// Information about a jump destination
#[derive(Debug, Clone)]
pub struct JumpDest {
    /// The instruction address that generates this jump
    pub from_addr: u64,
    /// The instruction that generates this jump
    pub instruction: String,
}

/// Minimal RV64 ELF program representation for label/jumpdest collection
pub struct Rv64Labels {
    /// All text labels and jump destinations
    pub jumpdests: BTreeSet<u64>,
    /// Entry point address
    pub entry_point: u64,
    /// Symbol table for debugging
    pub symbols: Vec<(u64, String)>,
    /// Jump destinations that are not symbols (address -> source instructions)
    pub jumpdests_with_debug_info: BTreeMap<u64, Vec<JumpDest>>,
    /// PC base (lowest executable address)
    pub pc_base: u64,
}

pub fn compute_jumpdests(file_name: &Path) -> Rv64Labels {
    log::info!("Loading RV64 ELF file: {}", file_name.display());
    let file_buffer = fs::read(file_name).unwrap();
    compute_jumpdests_from_buffer(&file_buffer)
}

pub fn compute_jumpdests_from_buffer(file_buffer: &[u8]) -> Rv64Labels {
    let elf = Elf::parse(file_buffer).unwrap();

    // Verify it's a 64-bit RISC-V ELF
    assert_eq!(
        elf.header.e_ident[EI_CLASS], ELFCLASS64,
        "Only 64-bit ELF files are supported by rv64 module!"
    );
    assert_eq!(
        elf.header.e_ident[EI_DATA], ELFDATA2LSB,
        "Only little-endian ELF files are supported!"
    );
    assert_eq!(
        elf.header.e_machine, EM_RISCV,
        "Only RISC-V ELF files are supported!"
    );

    let mut jumpdests = BTreeSet::new();
    let mut jumpdests_with_debug_info = BTreeMap::new();

    // Add entry point
    jumpdests.insert(elf.entry);

    // Find PC base (lowest executable address)
    let pc_base = elf
        .program_headers
        .iter()
        .filter(|ph| ph.is_executable())
        .map(|ph| ph.p_vaddr)
        .min()
        .unwrap_or(0);

    // Collect symbols that are in text sections
    let mut symbols = Vec::new();
    let mut symbol_addrs = BTreeSet::new();
    for sym in elf.syms.iter() {
        if sym.st_value != 0 {
            // Check if this symbol is in an executable section
            let in_text = elf.program_headers.iter().any(|ph| {
                ph.is_executable()
                    && sym.st_value >= ph.p_vaddr
                    && sym.st_value < ph.p_vaddr + ph.p_memsz
            });

            if in_text {
                jumpdests.insert(sym.st_value);
                symbol_addrs.insert(sym.st_value);
                if let Some(name) = elf.strtab.get_at(sym.st_name) {
                    symbols.push((sym.st_value, name.to_string()));
                }
            }
        }
    }

    // Scan text sections for jump destinations
    for ph in elf.program_headers.iter() {
        if ph.is_executable() {
            let seg = &file_buffer[ph.p_offset as usize..(ph.p_offset + ph.p_filesz) as usize];
            scan_for_jump_targets(
                ph.p_vaddr,
                seg,
                &mut jumpdests,
                &mut jumpdests_with_debug_info,
                &symbol_addrs,
            );
        }
    }

    Rv64Labels {
        jumpdests,
        entry_point: elf.entry,
        symbols,
        jumpdests_with_debug_info,
        pc_base,
    }
}

use std::collections::BTreeMap;

fn scan_for_jump_targets(
    base_addr: u64,
    data: &[u8],
    jumpdests: &mut BTreeSet<u64>,
    jumpdests_with_debug_info: &mut BTreeMap<u64, Vec<JumpDest>>,
    label_addrs: &BTreeSet<u64>,
) {
    data.chunks(4)
        // Cast to [u8; 4]
        .map(|data| data.try_into().unwrap())
        .inspect(|data: &[u8; 4]| {
            assert!(data[0] & 0b11 == 0b11, "Expected 32-bit instruction");
        })
        .map(u32::from_le_bytes)
        // Decode the instruction bytes
        .map(|insn_bytes| {
            insn_bytes
                .decode(Isa::Rv64)
                .expect("Failed to decode instruction")
        })
        // Remember the `rs1` and `imm` of the previous instruction if it was AUIPC, used to propagate it to the next JALR
        .scan(None, |previous_if_auipc, insn| {
            let previous_auipc_rs1 = std::mem::replace(
                previous_if_auipc,
                matches!(insn.opc, Op::AUIPC).then_some((insn.rs1, insn.imm)),
            );
            Some((insn, previous_auipc_rs1))
        })
        .enumerate()
        .for_each(|(instruction_index, (insn, previous_if_auipc))| {
            let addr = base_addr + (instruction_index * 4) as u64;

            // Check for jump/branch instructions
            match insn.opc {
                Op::JAL => {
                    // JAL has a PC-relative immediate
                    if let Some(imm) = insn.imm {
                        let target = (addr as i64 + imm as i64) as u64;
                        jumpdests.insert(target);

                        // Track non-symbol jumpdests
                        if !label_addrs.contains(&target) {
                            let jump_info = JumpDest {
                                from_addr: addr,
                                instruction: format!(
                                    "jal {}, 0x{:x}",
                                    insn.rd
                                        .map(|r| format!("x{r}"))
                                        .unwrap_or_else(|| "?".to_string()),
                                    target
                                ),
                            };
                            jumpdests_with_debug_info
                                .entry(target)
                                .or_default()
                                .push(jump_info);
                        }
                    }
                }
                Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => {
                    // Conditional branches have PC-relative immediates
                    if let Some(imm) = insn.imm {
                        let target = (addr as i64 + imm as i64) as u64;
                        jumpdests.insert(target);

                        // Track non-symbol jumpdests
                        if !label_addrs.contains(&target) {
                            let jump_info = JumpDest {
                                from_addr: addr,
                                instruction: format!(
                                    "{} {}, {}, 0x{:x}",
                                    format!("{:?}", insn.opc).to_lowercase(),
                                    insn.rs1
                                        .map(|r| format!("x{r}"))
                                        .unwrap_or_else(|| "?".to_string()),
                                    insn.rs2
                                        .map(|r| format!("x{r}"))
                                        .unwrap_or_else(|| "?".to_string()),
                                    target
                                ),
                            };
                            jumpdests_with_debug_info
                                .entry(target)
                                .or_default()
                                .push(jump_info);
                        }
                    }
                }
                Op::JALR => {
                    if let Some((rs1, imm)) = previous_if_auipc {
                        // JALR with a preceding AUIPC
                        if insn.rd == rs1 {
                            // This is an AUIPC+JALR pair, we can resolve it statically
                            if let (Some(auipc_imm), Some(jalr_imm)) = (imm, insn.imm) {
                                let target =
                                    (addr as i64 + auipc_imm as i64 + jalr_imm as i64) as u64;
                                jumpdests.insert(target);

                                // Track non-symbol jumpdests
                                if !label_addrs.contains(&target) {
                                    let jump_info = JumpDest {
                                        from_addr: addr,
                                        instruction: format!("auipc+jalr -> 0x{target:x}"),
                                    };
                                    jumpdests_with_debug_info
                                        .entry(target)
                                        .or_default()
                                        .push(jump_info);
                                }
                            }
                        }
                    } else {
                        // Standalone JALR without preceding AUIPC
                        // These are dynamic jumps we can't resolve statically:
                        // - Return instructions (jalr x0, x1, 0)
                        // - Indirect calls through function pointers
                        // - Computed jumps (switch statements, vtables)
                        // We just note their existence for completeness

                        let rs1_str = insn
                            .rs1
                            .map(|r| format!("x{r}"))
                            .unwrap_or_else(|| "?".to_string());
                        let rd_str = insn
                            .rd
                            .map(|r| format!("x{r}"))
                            .unwrap_or_else(|| "?".to_string());
                        let imm = insn.imm.unwrap_or(0);

                        // Only log if it's not a standard return (jalr x0, x1, 0)
                        if !(insn.rd == Some(0) && insn.rs1 == Some(1) && imm == 0) {
                            tracing::debug!(
                                "Note: Dynamic jump at 0x{addr:x}: jalr {rd_str}, {rs1_str}, {imm}",
                            );
                        }
                    }
                }
                _ => {}
            };
        });
}


================================================
FILE: riscv-types/Cargo.toml
================================================
[package]
name = "powdr-riscv-types"
description = "powdr RISCV types and traits"
version.workspace = true
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true

[dependencies]
powdr-isa-utils.workspace = true

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: riscv-types/src/lib.rs
================================================
use powdr_isa_utils::SingleDataValue;
use std::fmt;

#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct Register {
    value: u8,
}

impl Register {
    pub fn new(value: u8) -> Self {
        Self { value }
    }

    pub fn is_zero(&self) -> bool {
        self.value == 0
    }

    pub fn addr(&self) -> u8 {
        self.value
    }
}

/// List of machine registers, declared in the asm machine.
/// NOTE: the bootloader expects the PC to be the last register in this list.
pub const REGISTER_NAMES: [&str; 3] = ["main::query_arg_1", "main::query_arg_2", "main::pc"];

/// These are the names of the RISCV registers that are stored in memory.
pub const REGISTER_MEMORY_NAMES: [&str; 37] = [
    "x0",
    "x1",
    "x2",
    "x3",
    "x4",
    "x5",
    "x6",
    "x7",
    "x8",
    "x9",
    "x10",
    "x11",
    "x12",
    "x13",
    "x14",
    "x15",
    "x16",
    "x17",
    "x18",
    "x19",
    "x20",
    "x21",
    "x22",
    "x23",
    "x24",
    "x25",
    "x26",
    "x27",
    "x28",
    "x29",
    "x30",
    "x31",
    "tmp1",
    "tmp2",
    "tmp3",
    "tmp4",
    "lr_sc_reservation",
];

impl fmt::Display for Register {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", REGISTER_MEMORY_NAMES[self.value as usize])
    }
}

impl From<&str> for Register {
    fn from(s: &str) -> Self {
        REGISTER_MEMORY_NAMES
            .iter()
            .position(|&name| name == s)
            .map(|value| Self::new(value as u8))
            .unwrap_or_else(|| panic!("Invalid register"))
    }
}

pub enum Statement<'a, L: AsRef<str>, A: InstructionArgs> {
    DebugLoc { file: u64, line: u64, col: u64 },
    Label(L),
    Instruction { op: &'a str, args: A },
}

pub struct MemEntry {
    pub label: Option<String>,
    pub addr: u32,
    pub value: SingleDataValue,
}

pub struct SourceFileInfo<'a> {
    pub id: u32,
    pub dir: &'a str,
    pub file: &'a str,
}

/// A RISC-V program that can be translated to POWDR ASM.
pub trait RiscVProgram {
    /// Takes the listing of source files, to be used in the debug statements.
    fn take_source_files_info(&mut self) -> impl Iterator<Item = SourceFileInfo<'_>>;

    /// Takes the initial memory snapshot.
    fn take_initial_mem(&mut self) -> impl Iterator<Item = MemEntry>;

    /// Takes the executable statements and labels.
    fn take_executable_statements(
        &mut self,
    ) -> impl Iterator<Item = Statement<'_, impl AsRef<str>, impl InstructionArgs>>;

    /// Returns the addresses of the start and end of prover data.
    fn prover_data_bounds(&self) -> (u32, u32);

    /// The name of the function that should be called to start the program.
    fn start_function(&self) -> impl AsRef<str>;
}

pub trait InstructionArgs {
    type Error: fmt::Display;

    fn l(&self) -> Result<impl AsRef<str>, Self::Error>;
    fn r(&self) -> Result<Register, Self::Error>;
    fn rri(&self) -> Result<(Register, Register, u32), Self::Error>;
    /// Returns the usual rd, rs1, rs2
    fn rrr(&self) -> Result<(Register, Register, Register), Self::Error>;
    /// Special case used in amo* instructions, returning rd, rs2, rs1
    fn rrr2(&self) -> Result<(Register, Register, Register), Self::Error>;
    fn ri(&self) -> Result<(Register, u32), Self::Error>;
    fn rr(&self) -> Result<(Register, Register), Self::Error>;
    fn rrl(&self) -> Result<(Register, Register, impl AsRef<str>), Self::Error>;
    fn rl(&self) -> Result<(Register, impl AsRef<str>), Self::Error>;
    fn rro(&self) -> Result<(Register, Register, u32), Self::Error>;
    fn empty(&self) -> Result<(), Self::Error>;
}


================================================
FILE: rust-toolchain.toml
================================================
[toolchain]
channel = "nightly-2025-10-01"


================================================
FILE: scripts/analyze_nightly.py
================================================
#!/usr/bin/env python3
"""
Nightly regression analyzer for benchmark results.

This script analyzes the latest nightly benchmark results and compares them
to the previous nightly run. It reports any performance regressions in
APC (autoprecompile) configurations only, ignoring manual precompile results.

Results are fetched from: https://github.com/powdr-labs/bench-results/tree/gh-pages/results
"""

import argparse
from datetime import date
import json
import re
import sys
from dataclasses import dataclass
from io import StringIO
from typing import Optional
from urllib.request import urlopen, Request
from urllib.error import URLError, HTTPError

import pandas as pd


GITHUB_API_BASE = "https://api.github.com/repos/powdr-labs/bench-results"
RAW_CONTENT_BASE = "https://raw.githubusercontent.com/powdr-labs/bench-results/gh-pages"

# Benchmarks to analyze
BENCHMARKS = ["keccak", "sha256", "pairing", "u256", "matmul", "ecc", "ecrecover", "reth"]

# Date pattern for result directories (YYYY-MM-DD-HHMM)
DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}-\d{4}$")

# Pattern to extract APC count from config name (e.g., "apc030" -> 30)
APC_PATTERN = re.compile(r"apc(\d+)")


def is_apc_config(config: str) -> bool:
    """Check if a config uses APCs (apc count > 0)."""
    match = APC_PATTERN.search(config)
    if match:
        return int(match.group(1)) > 0
    return False


@dataclass
class BenchmarkResult:
    """Holds the best result for a benchmark."""
    benchmark: str
    best_config: str
    best_time_ms: float
    all_results: dict[str, float]


@dataclass
class ComparisonResult:
    """Holds the comparison between two benchmark runs."""
    benchmark: str
    latest_time_ms: float
    latest_config: str
    previous_time_ms: float
    previous_config: str
    change_percent: float
    is_regression: bool
    config_changed: bool  # True if best config differs between runs


def fetch_url(url: str, headers: Optional[dict] = None) -> str:
    """Fetch content from a URL.

    Raises:
        URLError: If the URL cannot be reached.
        HTTPError: If the server returns an error status code.
    """
    req = Request(url)
    if headers:
        for key, value in headers.items():
            req.add_header(key, value)

    with urlopen(req, timeout=60) as response:
        return response.read().decode('utf-8')


def get_results_directories() -> list[str]:
    """Get list of result directories from GitHub, sorted by date descending.

    Raises:
        URLError: If the GitHub API cannot be reached.
        HTTPError: If the GitHub API returns an error status code.
        json.JSONDecodeError: If the API response is not valid JSON.
    """
    url = f"{GITHUB_API_BASE}/contents/results?ref=gh-pages"
    headers = {"Accept": "application/vnd.github.v3+json"}

    content = fetch_url(url, headers)
    entries = json.loads(content)

    # Filter to only date-formatted directories
    dirs = [
        entry["name"] for entry in entries
        if entry["type"] == "dir" and DATE_PATTERN.match(entry["name"])
    ]

    # Sort by date descending (lexicographic works for YYYY-MM-DD-HHMM format)
    dirs.sort(reverse=True)
    return dirs


def fetch_benchmark_results(run_dir: str, benchmark: str) -> Optional[BenchmarkResult]:
    """Fetch and parse results for a specific benchmark from a run."""
    url = f"{RAW_CONTENT_BASE}/results/{run_dir}/{benchmark}/basic_metrics.csv"

    try:
        content = fetch_url(url)
    except (URLError, HTTPError) as e:
        print(f"Warning: Could not fetch {benchmark} results from {run_dir}: {e}", file=sys.stderr)
        return None

    try:
        df = pd.read_csv(StringIO(content))
        all_results: dict[str, float] = {
            str(row['filename']): float(row['total_proof_time_ms'])
            for _, row in df.iterrows()
        }

        # Only consider APC configs (apc count > 0), ignoring manual and baseline (apc000)
        apc_results = {k: v for k, v in all_results.items() if is_apc_config(k)}

        if not apc_results:
            return None

        # Find the best (lowest) total_proof_time_ms among APC configs
        best_config = min(apc_results, key=lambda k: apc_results[k])
        best_time = apc_results[best_config]

        return BenchmarkResult(
            benchmark=benchmark,
            best_config=best_config,
            best_time_ms=best_time,
            all_results=all_results
        )
    except (KeyError, ValueError) as e:
        print(f"Warning: Malformed CSV for {benchmark} in {run_dir}: {e}", file=sys.stderr)
        return None


def compare_results(
    latest: BenchmarkResult,
    previous: BenchmarkResult,
    regression_threshold: float = 0.0
) -> ComparisonResult:
    """Compare latest results to previous results."""
    if previous.best_time_ms == 0:
        change_percent = 0.0
        is_regression = False
    else:
        change_percent = (
            (latest.best_time_ms - previous.best_time_ms) / previous.best_time_ms
        ) * 100
        is_regression = change_percent > regression_threshold

    # Check if best config changed
    config_changed = latest.best_config != previous.best_config

    return ComparisonResult(
        benchmark=latest.benchmark,
        latest_time_ms=latest.best_time_ms,
        latest_config=latest.best_config,
        previous_time_ms=previous.best_time_ms,
        previous_config=previous.best_config,
        change_percent=change_percent,
        is_regression=is_regression,
        config_changed=config_changed,
    )


def print_error_report(error_msg: str) -> None:
    """Print a minimal error report to stdout."""
    print("# Nightly Benchmark Comparison Report")
    print("")
    print("## Errors")
    print("")
    print(f"- {error_msg}")


def format_change_percent(change: float) -> str:
    """Format a percentage change with appropriate sign."""
    if change == 0.0:
        return "0.0%"
    elif change > 0:
        return f"+{change:.1f}%"
    else:
        return f"{change:.1f}%"


def format_report(
    latest_run: str,
    previous_run: str,
    comparisons: list[ComparisonResult],
    errors: list[str],
    warnings: list[str]
) -> str:
    """Format the comparison report as markdown."""
    lines: list[str] = []

    def add_table_section(title: str, items: list[ComparisonResult]) -> None:
        """Add a markdown table section for comparison results."""
        if not items:
            return
        lines.append(f"## {title}")
        lines.append("")
        lines.append("| Benchmark | Latest (ms) | Previous (ms) | Change |")
        lines.append("|-----------|-------------|---------------|--------|")
        for r in items:
            lines.append(
                f"| {r.benchmark} | {r.latest_time_ms:.0f} ({r.latest_config}) | "
                f"{r.previous_time_ms:.0f} ({r.previous_config}) | "
                f"{format_change_percent(r.change_percent)} |"
            )
        lines.append("")

    lines.append("# Nightly Benchmark Comparison Report")
    lines.append("")
    lines.append(f"**Latest run:** {latest_run}")
    lines.append(f"**Previous run:** {previous_run}")
    lines.append("")

    if errors:
        lines.append("## Errors")
        lines.append("")
        for error in errors:
            lines.append(f"- {error}")
        lines.append("")

    if warnings:
        lines.append("## Warnings")
        lines.append("")
        for warning in warnings:
            lines.append(f"- {warning}")
        lines.append("")

    regressions = [c for c in comparisons if c.is_regression]
    improvements = [c for c in comparisons if c.change_percent < 0]
    stable = [c for c in comparisons if not c.is_regression and c.change_percent >= 0]

    add_table_section("Regressions", regressions)
    add_table_section("Improvements", improvements)
    add_table_section("Stable", stable)

    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(
        description="Analyze nightly benchmark results and report regressions."
    )
    parser.add_argument(
        "--regression-threshold",
        type=float,
        default=0.0,
        help="Percentage threshold above which a change is considered a regression (default: 0.0)"
    )
    parser.add_argument(
        "--latest",
        type=str,
        help="Specific run directory to use as latest (default: auto-detect)"
    )
    parser.add_argument(
        "--previous",
        type=str,
        help="Specific run directory to use as previous (default: auto-detect)"
    )
    parser.add_argument(
        "--benchmarks",
        type=str,
        nargs="+",
        default=BENCHMARKS,
        help=f"Benchmarks to analyze (default: {' '.join(BENCHMARKS)})"
    )
    parser.add_argument(
        "--output-format",
        choices=["markdown", "json"],
        default="markdown",
        help="Output format (default: markdown)"
    )
    args = parser.parse_args()

    # Get result directories
    print("Fetching results directories...", file=sys.stderr)
    try:
        result_dirs = get_results_directories()
    except (URLError, HTTPError) as e:
        print_error_report(f"Could not fetch results directories: {e}")
        sys.exit(1)
    except json.JSONDecodeError as e:
        print_error_report(f"Failed to parse GitHub API response: {e}")
        sys.exit(1)

    if len(result_dirs) < 2:
        print_error_report("Need at least 2 result directories to compare")
        sys.exit(1)

    # Find today's run (must exist unless --latest is provided)
    if args.latest:
        latest_run = args.latest
    else:
        today = date.today().strftime("%Y-%m-%d")
        today_runs = [d for d in result_dirs if d.startswith(today)]
        if not today_runs:
            print_error_report(f"No results found for today ({today})")
            sys.exit(1)
        latest_run = today_runs[0]  # Most recent run today (dirs are sorted descending)

    # Find previous run (most recent run that's not the latest)
    if args.previous:
        previous_run = args.previous
    else:
        previous_runs = [d for d in result_dirs if d != latest_run]
        if not previous_runs:
            print_error_report("No previous run found to compare against")
            sys.exit(1)
        previous_run = previous_runs[0]

    print(f"Comparing {latest_run} (latest) vs {previous_run} (previous)", file=sys.stderr)

    # Fetch results for each benchmark
    comparisons = []
    errors = []
    warnings = []

    for benchmark in args.benchmarks:
        print(f"Analyzing {benchmark}...", file=sys.stderr)

        latest_result = fetch_benchmark_results(latest_run, benchmark)
        previous_result = fetch_benchmark_results(previous_run, benchmark)

        if latest_result is None:
            errors.append(f"{benchmark}: No APC results found in latest run")
            continue

        if previous_result is None:
            errors.append(f"{benchmark}: No APC results found in previous run")
            continue

        comparison = compare_results(
            latest_result,
            previous_result,
            args.regression_threshold
        )
        comparisons.append(comparison)

        # Check for config changes
        if comparison.config_changed:
            warnings.append(
                f"{benchmark}: Best APC config changed from {comparison.previous_config} "
                f"to {comparison.latest_config}"
            )

    # Generate report
    if args.output_format == "json":
        output = {
            "latest_run": latest_run,
            "previous_run": previous_run,
            "comparisons": [
                {
                    "benchmark": c.benchmark,
                    "latest_time_ms": c.latest_time_ms,
                    "latest_config": c.latest_config,
                    "previous_time_ms": c.previous_time_ms,
                    "previous_config": c.previous_config,
                    "change_percent": c.change_percent,
                    "is_regression": c.is_regression,
                    "config_changed": c.config_changed,
                }
                for c in comparisons
            ],
            "errors": errors,
            "warnings": warnings,
            "has_regressions": any(c.is_regression for c in comparisons),
            "has_errors": len(errors) > 0,
            "has_warnings": len(warnings) > 0,
        }
        print(json.dumps(output, indent=2))
    else:
        report = format_report(latest_run, previous_run, comparisons, errors, warnings)
        print(report)

    # Exit with error code if there are regressions or errors
    has_regressions = any(c.is_regression for c in comparisons)
    has_errors = len(errors) > 0
    has_warnings = len(warnings) > 0

    if has_errors:
        print("\nErrors were encountered during analysis.", file=sys.stderr)
        sys.exit(2)

    if has_regressions:
        print("\nRegressions detected!", file=sys.stderr)
        sys.exit(1)

    if has_warnings:
        print("\nWarnings were generated (see report).", file=sys.stderr)

    print("\nNo regressions detected.", file=sys.stderr)
    sys.exit(0)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/update-dep.sh
================================================
#!/bin/bash

# Script to update openvm or stark-backend git revision hashes across the repository.
#
# Usage:
#   ./scripts/update-dep.sh openvm <new-rev>
#   ./scripts/update-dep.sh stark-backend <new-rev>
#
# Examples:
#   ./scripts/update-dep.sh openvm v1.5.0-powdr
#   ./scripts/update-dep.sh stark-backend v1.3.0-powdr

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"

DEP_TYPE="$1"
NEW_REV="$2"

usage() {
    echo "Usage: $0 <openvm|stark-backend> <new-rev>"
    echo ""
    echo "Examples:"
    echo "  $0 openvm v1.5.0-powdr"
    echo "  $0 stark-backend v1.3.0-powdr"
    echo ""
    echo "This script updates all git revision references for the specified dependency."
    exit 1
}

if [[ -z "$DEP_TYPE" ]] || [[ -z "$NEW_REV" ]]; then
    usage
fi

case "$DEP_TYPE" in
    openvm)
        GREP_PATTERN='powdr-labs/openvm.git'
        GIT_URL='https://github.com/powdr-labs/openvm.git'
        ;;
    stark-backend)
        GREP_PATTERN='powdr-labs/stark-backend.git'
        GIT_URL='https://github.com/powdr-labs/stark-backend.git'
        ;;
    *)
        echo "Error: Unknown dependency type '$DEP_TYPE'"
        echo ""
        usage
        ;;
esac

echo "Updating $DEP_TYPE dependencies to: $NEW_REV"
echo ""

# Find all Cargo.toml files with the specified git dependencies
# Store in an array to safely handle paths with spaces
CARGO_FILES=()
while IFS= read -r file; do
    [[ -n "$file" ]] && CARGO_FILES+=("$file")
done < <(find "$REPO_ROOT" -name "Cargo.toml" -exec grep -l "$GREP_PATTERN" {} \; 2>/dev/null || true)

if [[ ${#CARGO_FILES[@]} -eq 0 ]]; then
    echo "No Cargo.toml files with $DEP_TYPE dependencies found."
    exit 0
fi

for file in "${CARGO_FILES[@]}"; do
    echo "Updating $file"
    
    # Update revisions
    # Match: rev = "..." after the git URL
    sed -i -E 's|(git = "'"$GIT_URL"'", rev = ")[^"]+(")|'"\1${NEW_REV}\2|g" "$file"
done

echo ""
echo "Done! Updated the following files:"
for file in "${CARGO_FILES[@]}"; do
    echo "  - ${file#"$REPO_ROOT"/}"
done

echo ""
echo "Please review the changes and run 'cargo check' to verify."


================================================
FILE: syscalls/Cargo.toml
================================================
[package]
name = "powdr-syscalls"
description = "powdr syscalls"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[dependencies]

[lints]
workspace = true

[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458


================================================
FILE: syscalls/src/lib.rs
================================================
#![no_std]

macro_rules! syscalls {
    ($(($num:expr, $identifier:ident, $name:expr, $input_count:expr, $output_count:expr)),* $(,)?) => {
        /// We use repr(u8) to make sure the enum discriminant will fit into the
        /// 12 bits of the immediate field of the `addi` instruction,
        #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
        #[repr(u8)]
        pub enum Syscall {
            $($identifier = $num),*
        }

        impl Syscall {
            pub const fn name(&self) -> &'static str {
                match self {
                    $(Syscall::$identifier => $name),*
                }
            }

            pub const fn arity(&self) -> (u32, u32) {
                match self {
                    $(Syscall::$identifier => ($input_count, $output_count)),*
                }
            }
        }

        impl core::fmt::Display for Syscall {
            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                write!(f, "{}", match self {
                    $(Syscall::$identifier => $name),*
                })
            }
        }

        impl core::str::FromStr for Syscall {
            type Err = ();
            fn from_str(input: &str) -> Result<Self, Self::Err> {
                match input {
                    $($name => Ok(Syscall::$identifier)),*,
                    _ => Err(()),
                }
            }
        }

        impl From<Syscall> for u8 {
            fn from(syscall: Syscall) -> Self {
                syscall as Self
            }
        }

        impl core::convert::TryFrom<u8> for Syscall {
            type Error = ();
            fn try_from(value: u8) -> Result<Self, Self::Error> {
                match value {
                    $($num => Ok(Syscall::$identifier)),*,
                    _ => Err(()),
                }
            }
        }
    }
}

// Generate `Syscall` enum with supported syscalls and their numbers.
syscalls!(
    (1, Input, "input", 2, 1),
    (2, Output, "output", 2, 0),
    (3, PoseidonGL, "poseidon_gl", 1, 0),
    (4, Affine256, "affine_256", 4, 0),
    (5, EcAdd, "ec_add", 3, 0),
    (6, EcDouble, "ec_double", 2, 0),
    (7, KeccakF, "keccakf", 2, 0),
    (8, Mod256, "mod_256", 3, 0),
    (9, Halt, "halt", 0, 0),
    (10, Poseidon2GL, "poseidon2_gl", 2, 0),
    (11, NativeHash, "native_hash", 1, 0),
    (12, CommitPublic, "commit_public", 2, 0),
    (13, InvertGL, "invert_gl", 2, 2),
    (14, SplitGLVec, "split_gl_vec", 2, 0),
    (15, MergeGL, "merge_gl", 3, 0),
);